home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
TeX 1995 July
/
TeX CD-ROM July 1995 (Disc 1)(Walnut Creek)(1995).ISO
/
biblio
/
bibtex
/
utils
/
bibclean
/
bibclean.c
< prev
next >
Wrap
Text File
|
1992-11-23
|
148KB
|
5,639 lines
/***********************************************************************
@C-file{
author = "Nelson H. F. Beebe",
version = "2.05",
date = "24 November 1992",
time = "10:14:50 MST",
filename = "bibclean.c",
address = "Center for Scientific Computing
Department of Mathematics
University of Utah
Salt Lake City, UT 84112
USA",
telephone = "+1 801 581 5254",
FAX = "+1 801 581 4148",
checksum = "65270 5638 19219 147882",
email = "beebe@math.utah.edu (Internet)",
codetable = "ISO/ASCII",
keywords = "prettyprint, bibliography",
supported = "yes",
docstring = {Prettyprint one or more BibTeX files on stdin,
or specified files, to stdout, and check
the brace balance and value strings as well.
Text outside @item-type{...} BibTeX entries
is passed through verbatim, except that
trailing blanks are trimmed.
BibTeX items are formatted into a consistent
structure with one key = "value" pair per
line, and the initial @ and trailing right
brace in column 1. Long values are split at a
blank and continued onto the next line with
leading indentation. Tabs are expanded into
blank strings; their use is discouraged
because they inhibit portability, and can
suffer corruption in electronic mail. Braced
strings are converted to quoted strings.
This format facilitates the later application
of simple filters to process the text for
extraction of items, and also is the one
expected by the GNU Emacs BibTeX support
functions.
Usage:
bibclean [ -author ] [ -error-log filename ]
[ -help ] [ '-?' ] [ -init-file filename ]
[ -[no-]check-values ]
[ -[no-]delete-empty-fields ]
[ -[no-]file-position ]
[ -[no-]fix-initials ] [ -[no-]fix-names ]
[ -[no-]par-breaks ]
[ -[no-]print-patterns ]
[ -[no-]read-init-files ]
[ -[no-]remove-OPT-prefixes ]
[ -[no-]scribe ]
[ -[no-]trace-file-opening ]
[ -[no-]warnings ] [ -version ]
<infile or bibfile1 bibfile2 bibfile3 ...
>outfile
The checksum field above contains a CRC-16
checksum as the first value, followed by the
equivalent of the standard UNIX wc (word
count) utility output of lines, words, and
characters. This is produced by Robert
Solovay's checksum utility.},
}
***********************************************************************/
/***********************************************************************
The formatting should perhaps be user-customizable; that is left for
future work.
The major goal has been to convert entries to the standard form
@item-type{citation-key,
key = "value",
key = "value",
...
}
while applying heuristics to permit early error detection. If
the input file is syntactically correct for BibTeX and LaTeX,
this is reasonably easy. If the file has errors, error recovery
is attempted, but cannot be guaranteed to be successful; however,
the output file, and stderr, will contain an error message that
should localize the error to a single entry where a human can
find it more easily than a computer can. To facilitate error
checking and recovery, the following conditions are used:
@ starts a BibTeX entry only if it occurs at brace
level 0 and is not preceded by non-blank text on
the same line.
" is significant only at brace level 1.
{} are expected to occur at @-level 1 or higher
} at beginning of line ends a BibTeX entry
Backslashes preceding these 4 characters remove their special
significance.
These heuristics are needed to deal with legal value strings like
{..."...}
"...{..}..."
and will flag as errors strings like
"...{..."
"...}..."
The special treatment of @ and } at beginning of line attempts to
detect errors in entries before the rest of the file is swallowed
up in an attempt to complete an unclosed entry.
The output bibliography file should be processed by BibTeX and
the LaTeX without errors before discarding the original
bibliography file.
We do our own output and line buffering here so as to be able to
trim trailing blanks, and output data in rather large blocks for
efficiency (in filters of this type, I/O accounts for the bulk of
the processing, so large output buffers offer significant
performance gains).
The -scribe option enables recognition of the extended syntax used by
the Scribe document formatting system, originally developed by Brian
Reid at Carnegie-Mellon University, and now marketed by Unilogic, Ltd.
I have followed the syntax description in the Scribe Introductory
User's Manual, 3rd Edition, May 1980.
Scribe extensions include these features:
(1) Letter case is not significant in keywords and entry names, but
case is preserved in value strings.
(2) In key/value pairs, the key and value may be separated by one of
three characters: =, /, or space. Space may optionally surround these
separators.
(3) Value delimiters are any of these seven pairs:
{ } [ ] ( ) < > ' ' " " ` `
(4) Value delimiters may not be nested, even when with the first four
delimiter pairs, nested balanced delimiters would be unambiguous.
(5) Delimiters can be omitted around values that contain only letters,
digits, sharp (#), ampersand (&), period (.), and percent (%).
(6) A literal at-sign (@) is represented by doubled at-signs (@@).
(7) Bibliography entries begin with @name, as for BibTeX, but any of
the seven Scribe value delimiters may be used to surround the
key/value pairs. As in (4), nested delimiters are forbidden.
(8) Arbitrary space may separate entry names from the following
delimiters.
(9) @Comment is a special command whose delimited value is discared.
As in (4), nested delimiters are forbidden.
(10) The special form
@Begin{comment}
...
@End{comment}
permits encapsulating arbitrary text containing any characters or
delimiters, other than "@End{comment}". Any of the seven delimiters
may be used around the word comment following the @begin or @end.
(11) The "key" keyword is required in each bibliography entry.
(12) Semicolons may be used in place of "and" in author lists
(undocumented, but observed in practice).
Because of this loose syntax, error detection heuristics are less
effective, and consequently, Scribe mode input is not the default; it
must be explicitly requested.
========================================================================
***********************************************************************/
#define BIBCLEAN_VERSION "bibclean Version 2.05 [24-Nov-1992]"
/***********************************************************************
Revision history (reverse time order):
[16-Nov-1992 -- 24-Nov-1992] 2.05
Add Makefile steps to automatically extract help() text
from output of manual pages into new file bibclean.h, so
the built-in documentation stays up-to-date. The usage
messages still need manual adjustment if switches are
added or changed.
Add missing test of check_values in check_patterns().
Add support for optional warning messages with
patterns from initialization files. New function:
get_token(). New parsing code in do_new_pattern() to
handle optional warning message strings. Add message
argument to add_pattern().
Remove strip_comments() since comment processing is
now handled by get_token() and do_new_pattern(). This
permits unescaped comment characters inside quoted
strings.
Write bibclean.reg, an initialization file similar to
bibclean.ini, but with regular expressions.
Replace cascaded if statements for regular expression
testing with loop over patterns in check_patterns().
Move inclusion of match.h to after definition of
typedef YESorNO, and change type of match_pattern()
from int to YESorNO.
Add do_fileinit() and code in main() to call
do_fileinit() for each named input file with an
extension, replacing that extension with INITFILE_EXT
(default .ini). This adds a bibliography-specific
initialization capability to the system-wide,
user-wide, and job-wide files already supported.
Change -keep-initials and -keep-names to -fix-initials
and -fix-names, making them positive, rather than
negative, options. Also, make them independent by
moving invocations of fix_period() outside of
fix_author(), and by checking fix_names in
fix_author() instead of at start of fix_namelist().
Add -[no-]read-init-files option to allow control over
which initialization files are read.
Add -[no-]trace-file-opening option to allow easy
tracing of file opening attempts by the program. A
similar feature in my DVI drivers has proved enormously
valuable in tracking down problems of missing files.
Rename entry_name[] to current_entry_name[], key[] to
current_key[], tag[] to current_tag[], and value[] to
current_value[] to get more distinctive names for
those global variables.
Include the value string matching code selection in the
version() message; this is needed so that users can
prepare initialization files with the correct pattern
syntax.
Make several MAX_xxx symbolic constants definable
at compile time.
Add MAX_PATTERN_NAMES constant, and increase
pattern_names[] table to that size, leaving empty slots
for expansion. Extend add_pattern() so that
unrecognized key names result in creation of new entries
in pattern_names[], making the set of key/value pairs
extensible without modification of the bibclean source
code. Add check_other() to handle checking of other
keywords.
Add unexpected() to localize issuing of unexpected value
warnings.
Repair next_s() in match.c to skip past <backslash><non-letter>
TeX control sequence; it was stopping one character
early.
Revise upper-case letter bracing code in fix_title()
to handle more cases.
Rewrite space collapsing code in fix_pages() to only
collapse space around en-dashes. The previous code
was too aggressive, so that "319 with 30 illustrations"
became "319 with30illustrations".
Add check_tag() called from do_tag_name(), and add
second argument, value, to check_patterns().
Add format() called from error() and warning() to
expand %e (@entry name), %k (key), %t (tag), %v
(value), and %% (percent) format items in messages.
This feature is needed so user-defined messages in
initialization files can get key, tag, and value into
messages. It also simplifies, and improves, calls to
warning() and error().
Add some missing (void) typecasts before str***() calls.
Change word_length() to return one more than true
length at end of string. Change tests in out_s() to
> MAX_COLUMN instead of >= MAX_COLUMN. Previously, if
a line ended exactly at column MAX_COLUMN, bibclean
could produce a spurious blank line, and would
sometimes wrap a line earlier than necessary. Add
additional punctuation wrap points in out_s(), and
remove tests for non-blank whitespace in switch()
statement.
Change type of all string index variables from int to
size_t.
In get_simple_string(), use enum type for type codes if
NEW_STYLE.
In check_year(), validate all sequences of 1 or more
digits.
Use the C preprocessor to define memmove() to be
Memmove(), so we always use our own version. Too many
C and C++ implementations were found to be lacking it,
sigh... Similarly, we provide our own version of
strtol() (in a separate file) from the DVI 3.0
development, because it too is missing from older UNIX
systems.
Complete port to IBM PC DOS with Turbo C 2.0, and
Turbo C and C++ 3.0. This required economization of
storage for arrays of size [MAX_TOKEN_SIZE] to get
global data below 64KB without having to reduce
MAX_TOKEN.
Added code in do_more() and preprocessor conditionals
in out_lines() to handle character-at-a-time input for
help paging on IBM PC DOS. Keyboard function keys
PgUp, PgDn, End, Home, Up arrow and Down arrow are
also recognized. This was easy to do because most PC
DOS C compilers provide getch() to get a keyboard
character without echo. No fiddling of terminal modes
is needed like it is on other systems.
The IBM PC DOS port exposed a problem in findfile(),
where it was assumed that an environment variable
would not be longer than the longest filename. Turbo
C sets the latter to 80 characters, but environment
variables can be set that are almost 128 characters
long. Microsoft C 5.0 also sets it to 80, but C 5.1
sets it to 144, and C 6.0 and C and C++ 7.0 set it to
260. This has been handled by defining MAXPATHLEN at
compile time, overriding the built-in defaults.
Add support for character-at-a-time input for help
paging on VAX VMS, and for getting the screen size in
get_screen_lines().
Rename do_more_init() to kbopen(), do_more_term() to
kbclose(), and use kbget() in do_more() to conceal the
heavily-O/S dependent details of the kbxxx()
functions.
Introduce STREQUAL() macro to simplify coding.
Introduce KEY_FUNCTION_ENTRY type and apply_function()
to simplify coding, and use it in do_args(),
do_preargs(), and out_value(). Argument actions are
moved into separate functions, opt_xxx(). Rename
show_author() to opt_author(), and help() to
opt_help(). Rename do_file() to do_one_file(), and
move file loop code from main() into new do_files().
Split large body of get_simple_string() into four new
functions, get_braced_string(), get_digit_string(),
get_quoted_string(), and get_identifier_string().
Add check_inodes() to determine whether stdlog and
stdout are the same file. If so, we need to ensure
that each warning message begins a new line, without
double spacing unnecessarily when they are different
files.
Add memset() implementation for SunOS 4.1.1 CC (C++)
and BSD 4.3 UNIX because it is missing from their
run-time libraries.
Replace fopen() by macro FOPEN() to work around
erroneous fopen() prototype for SunOS 4.1.1 CC (C++).
Complete port to IBM PC DOS with Microsoft C 5.1 and
6.0 compilers. Minor source changes (the CONST macro
below) needed to work around compiler errors.
[15-Nov-1992] 2.04
Minor changes to complete successful VAX VMS
installation and test.
[15-Nov-1992] 2.03
Add match_pattern() support for consistent pattern
matching in the check_xxx() functions, using new code
defined separately in match.c.
Add support for run-time redefinition of patterns via
one or more initialization file(s) found in the PATH
(system-defined) and BIBINPUTS (user-defined) search
paths. New functions: add_pattern(),
check_patterns(), do_initfile(), do_new_pattern(),
do_single_arg(), enlarge_table(), get_line(),
strdup(), strip_comments(), and trim_value(). New C
preprocessor symbols: HAVE_OLDCODE, HAVE_PATTERNS,
HAVE_RECOMP, and HAVE_REGEXP. One of these should be
defined at compile time; if none are, then
HAVE_PATTERNS is the default.
Since options can now be specified in initialization
files, they each need negations so the command line
can override values from an initialization file.
Change all YES/NO flags to new type, YESorNO, for
better type checking.
Add do_more(), do_more_init(), and do_more_term(), for
pausing during help output; a private version of
screen paging is used instead of a pager invoked by
system() for better portability across systems. Set
SCREEN_LINES to 0 at compile time to suppress this
feature.
In fix_title(), add code to brace upper-case letters
for cases like:
"X11" -> "{X11}"
"Standard C Library" -> "Standard {C} Library"
"C++ Book" -> "{C}++ Book"
leaving
"A xxx"
unchanged.
[11-Nov-1992] 2.02
Add bad_ISBN(), bad_ISSN(), check_ISBN(), and
check_ISSN() for validation of ISBN and ISSN fields.
ISBN == "International Standard Book Number", and ISSN
= "International Standard Serial Number".
Add testisxn.bib and testisxn.bok to the test
collection, with steps in the Makefile to run the
test.
Add support for embedded \" in Scribe value strings
(forgotten in 2.01 revision); they are converted from
\"x to {\"x}.
[10-Nov-1992] 2.01
Add support for conversion of level-0 \"x to {\"x} and
x"y to x{"}y in value strings. Such input is illegal
for BibTeX, and causes hard-to-find errors, since
BibTeX raises an error at the line where it runs out
of string collection space, rather than at the
beginning of the collection point.
[06-Nov-1992] 2.00
Add full Scribe .bib file input compatibility with
-scribe command-line option.
Add support for multiple .bib file arguments on
command line, with new do_file() function to process
them.
Allow slash as well as hyphen for introducing
command-line options on VAX VMS and IBM PC DOS.
Add argument summary to help() (text extracted
verbatim from the manual pages).
Add new -delete-empty-fields, -keep-names,
-no-parbreaks, -remove-OPT-prefixes, and -no-warnings
command-line options and support code.
Add new out_with_error() and out_with_parbreak_error()
functions, and APPEND_CHAR() and EMPTY_STRING() macros
to shorten and clarify coding.
Add flush_inter_entry_space() function to standardize
line spacing.
Increase array sizes to MAX_TOKEN_SIZE (= MAX_TOKEN +
3) to reduce array bounds checking in inner loops.
Add additional file position tracking to enhance error
localization (structures IO_PAIR and POSITION, and
functions new_io_pair(), new_position(),
out_position(), and out_status()). Error messages are
parsable by GNU Emacs M-x next-error (C-x `) when
bibclean is run from Emacs by the command
M-x compile<RET>bibclean foo.bib >foo.new
Use arrays of constant strings for multiple string
output via new function out_lines(), instead of multiple
calls to fprintf().
Add additional checking via check_chapter(),
check_month(), check_number(), check_pages(),
check_volume(), check_year(), and match_regexp().
Supply implementation of memmove() library function
missing from g++ 2.2.2 library.
[03-Oct-1992] 1.06
Correct logic error in do_comma() that prevented correct
recognition of @name(key = "value") where the last
key/value pair did not have a trailing comma.
Add C++ support.
Add key_pair[] and entry_pair[] tables for
standardization of letter case usage, and use the new
NAME_PAIR type in fix_months().
Update author address.
Rename author() to show_author() to avoid shadowing
global names.
Fix two assignments of constant strings to char*
pointers.
Remove variable at_line_number which was defined, but
never used.
[01-Aug-1992] 1.05
Add -keep-initials switch support (thanks to Karl Berry
<karl@cs.umb.edu>). Internationalize telephone and FAX
numbers.
[02-Jan-1992] 1.04
Modify fix_title() to ignore macros. Modify
fix_author()) to ignore author lists with parentheses
(e.g. author = "P. D. Q. Bach (113 MozartStrasse,
Vienna, Austria)").
[31-Dec-1991] 1.03
Add fix_title() to supply braces around unbraced
upper-case acronyms in titles, and add private
definition of MAX().
[15-Nov-1991] 1.02
Handle @String(...) and @Preamble(...), converting
outer parentheses to braces. Insert spaces after
author and editor initials, and normalize names to
form "P. D. Q. Bach" instead of "Bach, P. D. Q.".
[10-Oct-1991] 1.01
Increase MAX_TOKEN to match enlarged BibTeX, and add
check against STD_MAX_TOKEN.
Output ISBN and ISSN in upper case.
Always surround = by blanks in key = "value".
[19-Dec-1990] 1.00 (version number unchanged)
Install Sun386i bug fix.
[08-Oct-1990] 1.00
Original version.
***********************************************************************/
/* Make a preliminary sanity check on which pattern matching we will use */
#if defined(HAVE_REGEXP)
#if defined(HAVE_RECOMP) || defined(HAVE_PATTERNS) || defined(HAVE_OLDCODE)
?? Define only one of HAVE_OLDCODE, HAVE_PATTERNS, HAVE_REGEXP, and HAVE_RECOMP
#endif
#endif
#if defined(HAVE_RECOMP)
#if defined(HAVE_REGEXP) || defined(HAVE_PATTERNS) || defined(HAVE_OLDCODE)
?? Define only one of HAVE_OLDCODE, HAVE_PATTERNS, HAVE_REGEXP, and HAVE_RECOMP
#endif
#endif
#if defined(HAVE_PATTERNS)
#if defined(HAVE_RECOMP) || defined(HAVE_REGEXP) || defined(HAVE_OLDCODE)
?? Define only one of HAVE_OLDCODE, HAVE_PATTERNS, HAVE_REGEXP, and HAVE_RECOMP
#endif
#endif
#if defined(HAVE_OLDCODE)
#if defined(HAVE_PATTERNS) || defined(HAVE_RECOMP) || defined(HAVE_REGEXP)
?? Define only one of HAVE_OLDCODE, HAVE_PATTERNS, HAVE_REGEXP, and HAVE_RECOMP
#endif
#endif
#if !(defined(HAVE_REGEXP) || defined(HAVE_RECOMP))
#if !(defined(HAVE_PATTERNS) || defined(HAVE_OLDCODE))
#define HAVE_PATTERNS 1
#endif
#endif
/***********************************************************************
We want this code to be compilable with C++ compilers as well as C
compilers, in order to get better compile-time checking. We therefore
must declare all function headers in both old Kernighan-and-Ritchie
style, as well as in new Standard C and C++ style. Although Standard C
also allows K&R style, C++ does not.
For functions with no argument, we just use VOID which expands to either
void, or nothing.
Older C++ compilers predefined the symbol c_plusplus, but that was
changed to __cplusplus in 1989 to conform to ISO/ANSI Standard C
conventions; we allow either.
It is regrettable that the C preprocessor language is not powerful
enough to transparently handle the generation of either style of
function declaration.
***********************************************************************/
#include "os.h"
#include "xstdlib.h"
#include "xstring.h"
#include "xctype.h"
#include "xstat.h"
#include "unixlib.h"
RCSID("$Id: bibclean.c,v 1.15 1992/11/24 15:37:18 beebe Exp beebe $")
/* $Log: bibclean.c,v $
* Revision 1.15 1992/11/24 15:37:18 beebe
* Incorporate changes for Microsoft C compiler port.
*
* Revision 1.14 1992/11/22 17:46:27 beebe
* Update for version 2.05. See internal change log for extensive list of
* changes.
*
* Revision 1.13 1992/11/15 16:54:39 beebe
* Got interrupted and forgot to update file header.
*
* Revision 1.12 1992/11/15 16:53:28 beebe
* Update for VAX VMS.
*
* Revision 1.11 1992/11/15 08:20:55 beebe
* Complete version 2.03; details are in the internal change log.
* */
#if defined(memmove)
#undef memmove /* at least one system defines this */
#endif
#define memmove Memmove /* we want our private version */
/* see 2.05 change log above for why */
#define NEW_STYLE (__cplusplus || __STDC__ || c_plusplus)
#if NEW_STYLE
#define VOID void
#else /* K&R style */
#define VOID
#endif /* NEW_STYLE */
#if NEW_STYLE
typedef enum { NO = 0, YES = 1 } YESorNO;
#else /* K&R style */
#define NO 0 /* must be FALSE (zero) */
#define YES 1 /* must be TRUE (non-zero) */
typedef int YESorNO;
#endif /* NEW_STYLE */
#include "match.h" /* must come after YESorNO typedef */
#if M_I86
#define CONST /* bug workaround for IBM PC Microsoft C compilers */
#else /* NOT M_I86 */
#define CONST const
#endif /* M_I86 */
typedef struct s_key_function_entry
{
const char *name; /* key name */
size_t min_match; /* minimum length string match */
void (*function)(VOID); /* function to call when key matched */
} KEY_FUNCTION_ENTRY;
typedef struct s_name_pair
{
const char *old_name;
const char *new_name;
} NAME_PAIR;
typedef struct s_position
{
const char *filename;
long byte_position;
long last_column_position;
long column_position;
long line_number;
} POSITION;
typedef struct s_io_pair
{
POSITION input;
POSITION output;
} IO_PAIR;
typedef struct s_pattern_table
{
MATCH_PATTERN *patterns;
int current_size;
int maximum_size;
} PATTERN_TABLE;
typedef struct s_pattern_names
{
const char *name;
PATTERN_TABLE *table;
} PATTERN_NAMES;
#if defined(sun386)
/* Sun386i run-time library bug in fputs(): only first line in s is written! */
#define fputs(s,fp) fwrite(s,1,strlen(s),fp)
#endif
#define APPEND_CHAR(s,n,c) (s[n] = (char)c, s[n+1] = (char)'\0')
/* append c and NUL to s[] */
#define COMMENT_PREFIX '%' /* comment character in initialization files */
#define CTL(X) (X & 037) /* make ASCII control character */
#define DELETE_CHAR (EOF - 1) /* magic char value for put_char() */
#define DELETE_LINE (EOF - 2) /* magic char value for put_char() */
#define EMPTY_STRING(s) (s[0] = (char)'\0', s)
/* for return (EMPTY_STRING(foo))*/
#define ERROR_PREFIX "??" /* this prefixes all error messages */
#if !defined(EXIT_FAILURE)
#define EXIT_FAILURE 1
#endif
#if !defined(EXIT_SUCCESS)
#define EXIT_SUCCESS 0
#endif
#undef FOPEN
#if defined(__SUNCC__)
#define FOPEN(a,b) fopen((char*)(a),(char*)(b))
/* bug workaround: wrong type for fopen() args with SunOS 4.1.2 CC */
#else /* NOT defined(__SUNCC__) */
#define FOPEN(a,b) fopen((a),(b))
#endif /* defined(__SUNCC__) */
#if !defined(INITFILE_EXT)
#define INITFILE_EXT ".ini" /* file extension for initialization files */
#endif
#define ISBN_DIGIT_VALUE(c) ((((c) == 'X') || ((c) == 'x')) ? 10 : \
((c) - '0'))
/* correct only if digits are valid; */
/* the code below ensures that */
#define ISSN_DIGIT_VALUE(c) ISBN_DIGIT_VALUE(c)
/* ISSN digits are just like ISBN digits */
#define KEY_INDENTATION 2 /* how far to indent "key = value," pairs */
#define LAST_SCREEN_LINE (-2) /* used in opt_help() and do_more() */
#if defined(MAX)
#undef MAX
#endif
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#if !defined(MAX_BUFFER)
#define MAX_BUFFER 8192 /* output buffer size; this does NOT */
/* limit lengths of input lines */
#endif /* !defined(MAX_BUFFER) */
#if !defined(MAX_COLUMN)
#define MAX_COLUMN 72 /* length of longest entry line; */
/* non-BibTeX entry text is output verbatim */
#endif /* !defined(MAX_COLUMN) */
#if !defined(MAX_KEY_LENGTH)
#define MAX_KEY_LENGTH 12 /* "howpublished" */
#endif /* !defined(MAX_KEY_LENGTH) */
#if !defined(MAX_LINE)
#define MAX_LINE 10240 /* maximum line length in initialization file */
#endif /* !defined(MAX_LINE) */
#if !defined(MAX_PATTERN_NAMES)
#define MAX_PATTERN_NAMES 100 /* maximum number of key/pattern types; */
/* 100 is far more than ever likely to be */
/* needed, but we only waste 8 bytes each for */
/* unused entries */
#endif /* !defined(MAX_PATTERN_NAMES) */
#if !defined(MAX_TOKEN)
#define MAX_TOKEN 4093 /* internal buffer size; no BibTeX string
value may be larger than this. */
#endif /* !defined(MAX_TOKEN) */
#define MAX_TOKEN_SIZE (MAX_TOKEN + 3) /* Arrays are always dimensioned
MAX_TOKEN_SIZE, so as to have space
for an additional pair of braces and a
trailing NUL, without tedious
subscript checking in inner loops. */
#define META(X) (X | 0200) /* make GNU Emacs meta character */
#define NOOP /* dummy statement */
#if defined(HAVE_PATTERNS)
#define PATTERN_MATCHES(string,pattern) (match_pattern(string,pattern) == YES)
#else /* NOT defined(HAVE_PATTERNS) */
#define PATTERN_MATCHES(string,pattern) match_regexp(string,pattern)
#endif /* defined(HAVE_PATTERNS) */
#if !defined(SCREEN_LINES)
#if OS_PCDOS
#define SCREEN_LINES 25 /* set 0 to disable pausing in out_lines() */
#else /* NOT OS_PCDOS */
#define SCREEN_LINES 24 /* set 0 to disable pausing in out_lines() */
#endif /* OS_PCDOS */
#endif /* !defined(SCREEN_LINES) */
#define SKIP_NONSPACE(p) while (*p && !isspace(*p)) ++p
#define SKIP_SPACE(p) while (isspace(*p)) ++p
#define STD_MAX_TOKEN ((size_t)1000) /* Standard BibTeX limit */
#define STREQUAL(a,b) (strcmp(a,b) == 0)
#define TABLE_CHUNKS 25 /* how many table entries to allocate at once */
#define TOLOWER(c) (isupper(c) ? tolower(c) : (c))
#define VALUE_INDENTATION (KEY_INDENTATION + MAX_KEY_LENGTH + 3)
/* where item values are output; allow space */
/* for "<key indent><key name>< = >" */
#define WARNING_PREFIX "%%" /* this prefixes all warning messages */
/* Operating system-specific customizations. */
#if OS_UNIX
#if !defined(INITFILE)
#define INITFILE ".bibcleanrc"
#endif
#if !defined(SYSPATH)
#define SYSPATH "PATH"
#endif
#if !defined(USERPATH)
#define USERPATH "BIBINPUTS"
#endif
#define isoptionprefix(c) ((c) == '-')
#endif /* OS_UNIX */
#if OS_VAXVMS
#if !defined(INITFILE)
#define INITFILE "bibclean.ini"
#endif
#if !defined(SYSPATH)
#define SYSPATH "SYS$SYSTEM"
#endif
#if !defined(USERPATH)
#define USERPATH "BIBINPUTS"
#endif
#define isoptionprefix(c) (((c) == '-') || ((c) == '/'))
#endif /* OS_VAXVMS */
#if OS_PCDOS
#define isoptionprefix(c) (((c) == '-') || ((c) == '/'))
#endif /* OS_PCDOS */
/* For any that are undefined, default to values suitable for OS_PCDOS. */
#if !defined(INITFILE)
#define INITFILE "bibclean.ini"
#endif
#if !defined(SYSPATH)
#define SYSPATH "PATH"
#endif
#if !defined(USERPATH)
#define USERPATH "BIBINPUTS"
#endif
/* All functions except main() are static to overcome limitations on
external name lengths in ISO/ANSI Standard C. Please keep them in
ALPHABETICAL order, ignoring letter case. */
static void add_one_pattern ARGS((PATTERN_TABLE *pt_, const char *keyname_,
const char *pattern_, const char *msg_));
static void add_pattern ARGS((const char *keyname_, const char *pattern_,
const char *msg_));
static YESorNO apply_function ARGS((const char *key_,
KEY_FUNCTION_ENTRY table_[]));
static void bad_ISBN ARGS((char ISBN_[11]));
static void bad_ISSN ARGS((char ISSN_[9]));
static void check_chapter ARGS((void));
static void check_inodes ARGS((void));
static void check_ISBN ARGS((void));
static void check_ISSN ARGS((void));
static void check_length ARGS((size_t n_));
static void check_month ARGS((void));
static void check_number ARGS((void));
static void check_other ARGS((void));
static void check_pages ARGS((void));
static YESorNO check_patterns ARGS((PATTERN_TABLE *pt_,const char *value_));
static void check_tag ARGS((void));
static void check_volume ARGS((void));
static void check_year ARGS((void));
static void do_args ARGS((int argc_, char *argv_[]));
static void do_at ARGS((void));
static void do_BibTeX_entry ARGS((void));
static void do_BibTeX_value ARGS((void));
static void do_close_brace ARGS((void));
static void do_comma ARGS((void));
static void do_entry_name ARGS((void));
static void do_equals ARGS((void));
static void do_escapes ARGS((char *s_));
static void do_files ARGS((int argc_, char *argv_[]));
static void do_fileinit ARGS((const char *bibfilename_));
static void do_group ARGS((void));
static void do_initfile ARGS((const char *pathlist_,const char *name_));
static void do_key ARGS((void));
static YESorNO do_key_value_pair ARGS((void));
#if (SCREEN_LINES > 0)
static int do_more ARGS((FILE *fpout_, int line_, int pause_after_));
#endif /* (SCREEN_LINES > 0) */
static void do_new_pattern ARGS((char *s_));
static void do_one_file ARGS((FILE *fp_));
static void do_open_brace ARGS((void));
static void do_other ARGS((void));
static void do_preargs ARGS((int argc_, char *argv_[]));
static void do_Scribe_block_comment ARGS((void));
static void do_Scribe_close_delimiter ARGS((void));
static void do_Scribe_comment ARGS((void));
static void do_Scribe_entry ARGS((void));
static void do_Scribe_open_delimiter ARGS((void));
static void do_Scribe_separator ARGS((void));
static void do_Scribe_value ARGS((void));
static void do_single_arg ARGS((char *s_));
static void do_tag_name ARGS((void));
static void enlarge_table ARGS((PATTERN_TABLE *table_));
static void error ARGS((const char *msg_));
static void fatal ARGS((const char *msg_));
char *findfile ARGS((const char *pathlist_, const char *name_));
static char *fix_author ARGS((char *author_));
static void fix_month ARGS((void));
static void fix_namelist ARGS((void));
static void fix_pages ARGS((void));
static char *fix_periods ARGS((char *author_));
static void fix_title ARGS((void));
static void flush_inter_entry_space ARGS((void));
static char *format ARGS((const char *msg_));
static char *get_braced_string ARGS((void));
static int get_char ARGS((void));
static char *get_digit_string ARGS((void));
static char *get_identifier_string ARGS((void));
static char *get_line ARGS((FILE *fp_));
static int get_next_non_blank ARGS((void));
static char *get_quoted_string ARGS((void));
#if (SCREEN_LINES > 0)
static int get_screen_lines ARGS((void));
#endif /* (SCREEN_LINES > 0) */
static char *get_Scribe_delimited_string ARGS((void));
static char *get_Scribe_identifier_string ARGS((void));
static char *get_Scribe_string ARGS((void));
static char *get_simple_string ARGS((void));
static char *get_token ARGS((char *s_, char **nextp_,
const char *terminators_));
static int isidchar ARGS((int c_));
#define iskeyvalueseparator(c) (((c) == '=') || ((c) == ':'))
#if (SCREEN_LINES > 0)
#if NEW_STYLE
typedef enum key_code {
KEY_EOF = EOF,
KEY_UNKNOWN = 0,
KEY_AGAIN,
KEY_DOWN,
KEY_END,
KEY_HELP,
KEY_HOME,
KEY_PGDN,
KEY_PGUP,
KEY_QUIT,
KEY_UP
} KEYCODE;
#else /* K&R style */
#define KEY_EOF EOF
#define KEY_UNKNOWN 0
#define KEY_AGAIN 1
#define KEY_DOWN 2
#define KEY_END 3
#define KEY_HELP 4
#define KEY_HOME 5
#define KEY_PGDN 6
#define KEY_PGUP 7
#define KEY_QUIT 8
#define KEY_UP 9
typedef int KEYCODE;
#endif /* NEW_STYLE */
#define MAX_CHAR 256
KEYCODE keymap[MAX_CHAR];
static void kbclose ARGS((void));
static KEYCODE kbcode ARGS((void));
static int kbget ARGS((void));
static void kbinitmap ARGS((void));
static void kbopen ARGS((void));
#endif /* (SCREEN_LINES > 0) */
int main ARGS((int argc_, char *argv_[]));
#if (defined(HAVE_REGEXP) || defined(HAVE_RECOMP))
static int match_regexp ARGS((const char *string_,const char *pattern_));
#endif /* (defined(HAVE_REGEXP) || defined(HAVE_RECOMP)) */
/* NB: memmove() is a private version known as Memmove() to the compiler */
static void memmove ARGS((void *target_, const void *source_, size_t n_));
static void new_entry ARGS((void));
static void new_io_pair ARGS((IO_PAIR *pair_));
static void new_position ARGS((POSITION *position_));
static void opt_author ARGS((void));
static void opt_check_values ARGS((void));
static void opt_delete_empty_fields ARGS((void));
static void opt_error_log ARGS((void));
static void opt_file_position ARGS((void));
static void opt_fix_initials ARGS((void));
static void opt_fix_names ARGS((void));
static void opt_help ARGS((void));
static void opt_init_file ARGS((void));
static void opt_parbreaks ARGS((void));
static void opt_print_patterns ARGS((void));
static void opt_read_init_files ARGS((void));
static void opt_remove_OPT_prefixes ARGS((void));
static void opt_scribe ARGS((void));
static void opt_trace_file_opening ARGS((void));
static void opt_version ARGS((void));
static void opt_warnings ARGS((void));
#define out_c(c_) put_char(c_) /* out_c() no longer a function */
static void out_equals ARGS((void));
static void out_error ARGS((FILE *fpout_, const char *s_));
static void out_flush ARGS((void));
static void out_key ARGS((void));
static void out_lines ARGS((FILE *fpout_,const char *lines_[],
YESorNO pause_));
static void out_position ARGS((FILE *fpout_,const char *msg_,
IO_PAIR *the_location_));
static void out_s ARGS((const char *s_));
static void out_spaces ARGS((int n_));
static void out_status ARGS((FILE *fpout_, const char *prefix_));
static void out_value ARGS((void));
static void out_with_error ARGS((const char *s_,const char *msg_));
static void out_with_parbreak_error ARGS((char *s_));
static void prt_pattern ARGS((const char *keyname_, const char *pattern_,
const char *msg_));
static void put_back ARGS((int c_));
static void put_back_string ARGS((const char *s_));
static void put_char ARGS((int c_));
static void resync ARGS((void));
char *strdup ARGS((const char *s_));
int strnicmp ARGS((const char *s1_, const char *s2_, size_t n_));
static FILE *tfopen ARGS((const char *filename_, const char *mode_));
static void trim_value ARGS((void));
static void unexpected ARGS((void));
static void usage ARGS((void));
static void version ARGS((void));
static void warning ARGS((const char *msg_));
static int word_length ARGS((const char *s_));
static void wrap_line ARGS((void));
static YESorNO YESorNOarg ARGS((void));
/**********************************************************************/
/* All global variables are static to keep them local to this file,
and to overcome limitations on external name lengths in ISO/ANSI
Standard C. Please keep them in ALPHABETICAL order, ignoring letter
case. */
static int at_level = 0; /* @ nesting level */
static int brace_level = 0; /* curly brace nesting level */
static YESorNO check_values = YES; /* NO: suppress value checks */
static int close_char = EOF; /* BibTeX entry closing; may */
/* be right paren or brace */
static char current_entry_name[MAX_TOKEN_SIZE]; /* entry name */
static int current_index; /* argv[] index in do_args() */
static char current_key[MAX_TOKEN_SIZE]; /* key name */
static char *current_option; /* set by do_args() */
static char current_tag[MAX_TOKEN_SIZE]; /* citation tag */
static char current_value[MAX_TOKEN_SIZE]; /* string value */
static YESorNO delete_empty_fields = NO; /* YES: delete empty fields */
static YESorNO discard_next_comma = NO; /* YES: deleting key/value */
static YESorNO eofile = NO; /* set to YES at end-of-file */
static int error_count = 0; /* used to decide exit code */
/* normalizing names */
#if defined(DEBUG)
static FILE *fpdebug; /* for debugging */
#endif /* defined(DEBUG) */
static FILE *fpin; /* input file pointer */
static char *initialization_file_name;
static YESorNO is_parbreak = NO; /* get_next_non_blank() sets */
static YESorNO fix_initials = YES; /* reformat A.U. Thor? */
static YESorNO fix_names = YES; /* reformat Bach, P.D.Q? */
static NAME_PAIR month_pair[] =
{
{"\"January\"", "jan",},
{"\"February\"", "feb",},
{"\"March\"", "mar",},
{"\"April\"", "apr",},
{"\"May\"", "may",},
{"\"June\"", "jun",},
{"\"July\"", "jul",},
{"\"August\"", "aug",},
{"\"September\"","sep",},
{"\"October\"", "oct",},
{"\"November\"", "nov",},
{"\"December\"", "dec",},
};
static char *next_option; /* set in do_args() */
static int non_white_chars = 0; /* used to test for legal @ */
static YESorNO parbreaks = YES; /* NO: parbreaks forbidden */
/* in strings and entries */
static YESorNO print_patterns = NO; /* YES: print value patterns */
static char *program_name; /* set to argv[0] */
static PATTERN_TABLE pt_chapter = { (MATCH_PATTERN*)NULL, 0, 0 };
static PATTERN_TABLE pt_month = { (MATCH_PATTERN*)NULL, 0, 0 };
static PATTERN_TABLE pt_number = { (MATCH_PATTERN*)NULL, 0, 0 };
static PATTERN_TABLE pt_pages = { (MATCH_PATTERN*)NULL, 0, 0 };
static PATTERN_TABLE pt_volume = { (MATCH_PATTERN*)NULL, 0, 0 };
static PATTERN_TABLE pt_year = { (MATCH_PATTERN*)NULL, 0, 0 };
static PATTERN_NAMES pattern_names[MAX_PATTERN_NAMES] =
{
{"chapter", &pt_chapter},
{"month", &pt_month},
{"number", &pt_number},
{"pages", &pt_pages},
{"volume", &pt_volume},
{"year", &pt_year},
#if _AIX370
{NULL, NULL}, /* CC compiler cannot handle correct cast */
#else /* NOT _AIX370 */
{(CONST char*)NULL, (PATTERN_TABLE*)NULL}, /* entry terminator */
#endif /* _AIX370 */
/* remaining slots may be initialized at run time */
};
static YESorNO read_initialization_files = YES;/* -[no]-read-init-files sets */
static YESorNO remove_OPT_prefixes = NO; /* YES: remove OPT prefix */
static YESorNO rflag = NO; /* YES: resynchronizing */
static int screen_lines = SCREEN_LINES;/* kbopen() and out_lines() reset */
static YESorNO Scribe = NO; /* Scribe format input */
static char Scribe_open_delims[] = "{[(<'\"`";
static char Scribe_close_delims[] = "}])>'\"`";
/* In all memory models from tiny to huge, Turbo C on IBM PC DOS will
not permit more than 64KB of global constant data. Therefore, we use a
global scratch array shared between the functions fix_title(),
format(), get_Scribe_identifier_string() and
get_Scribe_delimited_string(). The code has been carefully examined
to make sure that this space is not overwritten while still in use.
Oh, the pain of the Intel segmented memory architecture! */
static char shared_string[MAX_TOKEN_SIZE];
static YESorNO show_file_position = NO; /* messages usually brief */
static FILE *stdlog; /* usually stderr */
YESorNO stdlog_on_stdout = YES; /* NO for separate files */
#if OS_PCDOS
unsigned int _stklen = 0xF000; /* stack size for Turbo C */
#endif /* OS_PCDOS */
static IO_PAIR the_entry; /* used in error messages */
static IO_PAIR the_file; /* used in error messages */
static IO_PAIR the_value; /* used in error messages */
static YESorNO trace_file_opening = NO; /* -[no-]trace-file-opening sets */
static YESorNO warnings = YES; /* NO: suppress warnings */
/**********************************************************************/
#if NEW_STYLE
static void
add_one_pattern(PATTERN_TABLE *pt, const char *keyname, const char *pattern,
const char *message)
#else /* K&R style */
static void
add_one_pattern(pt,keyname,pattern,message)
PATTERN_TABLE *pt;
const char *keyname;
const char *pattern;
const char *message;
#endif /* NEW_STYLE */
{
int m; /* index into pt->patterns[] */
if (STREQUAL(pattern,"")) /* then clear pattern table */
{
for (m = 0; m < pt->current_size; ++m)
{ /* free old pattern memory */
if (pt->patterns[m].pattern != (char*)NULL)
free((char*)pt->patterns[m].pattern);
/* NB: (void*) cast fails with Sun C++ */
if (pt->patterns[m].message != (char*)NULL)
free((char*)pt->patterns[m].message);
}
pt->current_size = 0;
}
else /* otherwise add new pattern */
{
if (pt->current_size == pt->maximum_size) /* then table full */
enlarge_table(pt);
for (m = 0; m < pt->current_size; ++m)
{
/* Make sure this is not a duplicate; if it is, and its message */
/* is the same, then we just ignore the request. Duplicates */
/* are possible when the user and system search paths overlap. */
if (STREQUAL(pattern,pt->patterns[m].pattern))
{ /* duplicate pattern found */
if (((pt->patterns[m].message) != (char*)NULL)
&& (message != (char*)NULL) &&
(STREQUAL(message,pt->patterns[m].message)))
return; /* messages duplicate too */
pt->patterns[m].message =
(message == (char*)NULL) ? message :
(const char*)strdup(message);
/* replace message string */
prt_pattern(keyname,pattern,message);
return;
}
}
/* We have a new and distinct pattern and message, so save them */
pt->patterns[pt->current_size].pattern = strdup(pattern);
pt->patterns[pt->current_size++].message =
(message == (char*)NULL) ? message : (const char*)strdup(message);
}
prt_pattern(keyname,pattern,message);
}
#if NEW_STYLE
static void
add_pattern(const char *keyname, const char *pattern, const char *message)
#else /* K&R style */
static void
add_pattern(keyname,pattern,message)
const char *keyname;
const char *pattern;
const char *message;
#endif /* NEW_STYLE */
{
int k; /* index into pattern_names[] */
size_t n = strlen(keyname); /* saved keyname string length */
for (k = 0; pattern_names[k].name != (const char*)NULL; ++k)
{ /* find the correct pattern table */
if (strnicmp(pattern_names[k].name,keyname,n) == 0)
{ /* then found the required table */
add_one_pattern(pattern_names[k].table,keyname,pattern,message);
return;
}
}
/* If we get here, then the pattern name is not in the built-in list,
so create a new entry in pattern_names[] if space remains */
if (k >= (sizeof(pattern_names)/sizeof(pattern_names[0]) - 1))
{ /* too many pattern types */
(void)fprintf(stdlog,
"%s Out of memory for pattern name [%s] -- pattern ignored\n",
WARNING_PREFIX, keyname);
}
else
{ /* sufficient table space remains */
pattern_names[k].name = strdup(keyname); /* add new table entry */
pattern_names[k].table = (PATTERN_TABLE*)malloc(sizeof(PATTERN_TABLE));
if (pattern_names[k].table == (PATTERN_TABLE*)NULL)
fatal("Out of memory for pattern tables");
pattern_names[k].table->patterns = (MATCH_PATTERN*)NULL;
pattern_names[k].table->current_size = 0;
pattern_names[k].table->maximum_size = 0;
add_one_pattern(pattern_names[k].table,keyname,pattern,message);
pattern_names[k+1].name = (char*)NULL; /* mark new end of table */
pattern_names[k+1].table = (PATTERN_TABLE*)NULL;
}
}
#if NEW_STYLE
static YESorNO
apply_function(const char *key, KEY_FUNCTION_ENTRY table[])
#else /* K&R style */
static YESorNO
apply_function(key,table)
const char *key;
KEY_FUNCTION_ENTRY table[];
#endif /* NEW_STYLE */
{ /* return YES if function matching key was invoked, otherwise NO */
int k; /* index into table[] */
for (k = 0; table[k].name != (const char*)NULL; ++k)
{
if (strnicmp(key,table[k].name,table[k].min_match) == 0)
{
table[k].function();
return (YES);
}
}
return (NO);
}
#if NEW_STYLE
static void
bad_ISBN(char ISBN[11])
#else /* K&R style */
static void
bad_ISBN(ISBN)
char ISBN[11];
#endif /* NEW_STYLE */
{
static char fmt[] =
"Invalid checksum for ISBN %c-%c%c%c%c%c-%c%c%c-%c in ``%%k = %%v''";
char msg[sizeof(fmt)];
(void)sprintf(msg, fmt,
(int)ISBN[1], (int)ISBN[2], (int)ISBN[3], (int)ISBN[4], (int)ISBN[5],
(int)ISBN[6], (int)ISBN[7], (int)ISBN[8], (int)ISBN[9], (int)ISBN[10]);
error(msg);
}
#if NEW_STYLE
static void
bad_ISSN(char ISSN[9])
#else /* K&R style */
static void
bad_ISSN(ISSN)
char ISSN[9];
#endif /* NEW_STYLE */
{
static char fmt[] =
"Invalid checksum for ISSN %c%c%c%c-%c%c%c%c in ``%%k = %%v''";
char msg[sizeof(fmt)];
(void)sprintf(msg, fmt,
(int)ISSN[1], (int)ISSN[2], (int)ISSN[3], (int)ISSN[4],
(int)ISSN[5], (int)ISSN[6], (int)ISSN[7], (int)ISSN[8]);
error(msg);
}
static void
check_chapter(VOID)
{
#if defined(HAVE_OLDCODE)
size_t k;
size_t n = strlen(current_value) - 1;
/* match patterns like "23" and "23-1" */
for (k = 1; k < n; ++k)
{ /* omit first and last characters -- they are quotation marks */
if (!(isdigit(current_value[k]) || (current_value[k] == '-')))
break;
}
if (k == n)
return;
#else /* NOT defined(HAVE_OLDCODE) */
if (check_patterns(&pt_chapter,current_value) == YES)
return;
#endif /* defined(HAVE_OLDCODE) */
unexpected();
}
static void
check_inodes(VOID)
{
struct stat buflog;
struct stat bufout;
stdlog_on_stdout = YES; /* assume the worst initially */
(void)fstat(fileno(stdlog),&buflog);
(void)fstat(fileno(stdout),&bufout);
#if OS_UNIX
stdlog_on_stdout = (buflog.st_ino == bufout.st_ino) ? YES : NO;
#endif /* OS_UNIX */
#if OS_PCDOS
/* No inodes, so use creation times instead */
stdlog_on_stdout = (buflog.st_ctime == bufout.st_ctime) ? YES : NO;
#endif /* OS_PCDOS */
#if OS_VAXVMS
/* Inode field is 3 separate values */
stdlog_on_stdout = ((buflog.st_ino[0] == bufout.st_ino[0]) &&
(buflog.st_ino[1] == bufout.st_ino[1]) &&
(buflog.st_ino[2] == bufout.st_ino[2])) ? YES : NO;
#endif /* OS_VAXVMS */
}
static void
check_ISBN(VOID)
{
int checksum;
char ISBN[11]; /* saved ISBN for error messages */
/* (use slots 1..10 instead of 0..9) */
int k; /* index into ISBN[] */
size_t n; /* index into current_value[] */
YESorNO new_ISBN; /* YES: start new ISBN */
/*******************************************************************
ISBN numbers are 10-character values from the set [0-9Xx], with
a checksum given by
(sum(k=1:9) digit(k) * k) mod 11 == digit(10)
where digits have their normal value, X (or x) as a digit has
value 10, and spaces and hyphens are ignored. The sum is
bounded from above by 10*(1 + 2 + ... + 9) = 450, so even short
(16-bit) integers are sufficient for the accumulation.
We allow multiple ISBN numbers separated by arbitrary
characters other than [0-9Xx], and check each one of them.
*******************************************************************/
for (checksum = 0, k = 0, new_ISBN = YES, n = 1; current_value[n+1]; ++n)
{ /* loop skips surrounding quotes */
if (new_ISBN == YES)
{
(void)strcpy(ISBN,"???????????");
/* initialize for error messages */
checksum = 0; /* new checksum starting */
k = 0; /* no digits collected yet */
new_ISBN = NO; /* initialization done */
}
switch (current_value[n])
{
case ' ':
case '-':
break; /* ignore space and hyphen */
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case 'X':
case 'x': /* valid ISBN digit */
k++;
if (k < 10)
{
ISBN[k] = current_value[n];
checksum += ISBN_DIGIT_VALUE(ISBN[k]) * k;
break;
}
else if (k == 10)
{
ISBN[k] = current_value[n];
if ((checksum % 11) != ISBN_DIGIT_VALUE(ISBN[k]))
bad_ISBN(ISBN);
new_ISBN = YES;
break;
}
/* k > 10: FALL THROUGH for error */
default: /* ignore all other characters */
if (k > 0) /* then only got partial ISBN */
{
bad_ISBN(ISBN);
new_ISBN = YES; /* start new checksum */
}
break;
} /* end switch (current_value[n]) */
} /* end for (loop over current_value[]) */
if ((k > 0) && (new_ISBN == NO)) /* too few digits in last ISBN */
bad_ISBN(ISBN);
}
static void
check_ISSN(VOID)
{
int checksum;
char ISSN[9]; /* saved ISSN for error messages */
/* (use slots 1..8 instead of 0..7) */
int k; /* index into ISSN[] */
size_t n; /* index into current_value[] */
YESorNO new_ISSN; /* YES: start new ISSN */
/*******************************************************************
ISSN numbers are 10-character values from the set [0-9Xx], with
a checksum given by
(sum(k=1:7) digit(k) * (k+2)) mod 11 == digit(8)
where digits have their normal value, X (or x) as a digit has
value 10, and spaces and hyphens are ignored. The sum is
bounded from above by 10*(3 + 4 + ... + 9) = 420, so even short
(16-bit) integers are sufficient for the accumulation.
We allow multiple ISSN numbers separated by arbitrary
characters other than [0-9Xx], and check each one of them.
*******************************************************************/
for (checksum = 0, k = 0, new_ISSN = YES, n = 1; current_value[n+1]; ++n)
{ /* loop skips surrounding quotes */
if (new_ISSN == YES)
{
(void)strcpy(ISSN,"?????????"); /* initialize for error messages */
k = 0; /* no digits collected yet */
checksum = 0; /* new checksum starting */
new_ISSN = NO; /* initialization done */
}
switch (current_value[n])
{
case ' ':
case '-':
break; /* ignore space and hyphen */
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case 'X':
case 'x': /* valid ISSN digit */
k++;
if (k < 8)
{
ISSN[k] = current_value[n];
checksum += ISSN_DIGIT_VALUE(ISSN[k]) * (k + 2);
break;
}
else if (k == 8)
{
ISSN[k] = current_value[n];
if ((checksum % 11) != ISSN_DIGIT_VALUE(ISSN[k]))
bad_ISSN(ISSN);
new_ISSN = YES;
break;
}
/* k > 8: FALL THROUGH for error */
default: /* ignore all other characters */
if (k > 0) /* then only got partial ISSN */
{
bad_ISSN(ISSN);
new_ISSN = YES; /* start new checksum */
}
break;
} /* end switch (current_value[n]) */
} /* end for (loop over current_value[]) */
if ((k > 0) && (new_ISSN == NO)) /* too few digits in last ISSN */
bad_ISSN(ISSN);
}
#if NEW_STYLE
static void
check_length(size_t n)
#else /* K&R style */
static void
check_length(n)
size_t n;
#endif /* NEW_STYLE */
{
if ((check_values == YES) && (n >= STD_MAX_TOKEN))
warning("String length exceeds standard BibTeX limit for ``%k'' entry");
}
static void
check_month(VOID)
{
int m; /* month index */
size_t n = strlen(current_value);
if (STREQUAL(current_value,"\"\"")) /* ignore empty values */
return;
if (n == 3) /* check for match against standard abbrevs */
{
for (m = 0; m < 12; ++m)
{
if (strnicmp(month_pair[m].new_name,current_value,3) == 0)
return;
}
}
/* Hand coding for the remaining patterns is too ugly to contemplate,
so we only provide the checking when real pattern matching is
available. */
#if !defined(HAVE_OLDCODE)
if (check_patterns(&pt_month,current_value) == YES)
return;
#endif /* !defined(HAVE_OLDCODE) */
unexpected();
}
static void
check_number(VOID)
{
#if defined(HAVE_OLDCODE)
size_t k;
size_t n = strlen(current_value) - 1;
if (STREQUAL(current_value,"\"\"")) /* ignore empty values */
return;
/* We expect the value string to match the regexp "[0-9a-zA-Z---,/ ()]+
to handle values like "UMIACS-TR-89-11, CS-TR-2189, SRC-TR-89-13",
"RJ 3847 (43914)", "{STAN-CS-89-1256}", "UMIACS-TR-89-3.1, CS-TR-2177.1",
"TR\#89-24", "23", "23-27", and "3+4". */
for (k = 1; k < n; ++k)
{ /* omit first and last characters -- they are quotation marks */
if (!( isalnum(current_value[k])
|| isspace(current_value[k]) || (current_value[k] == '-')
|| (current_value[k] == '+') || (current_value[k] == ',')
|| (current_value[k] == '.') || (current_value[k] == '/')
|| (current_value[k] == '#') || (current_value[k] == '\\')
|| (current_value[k] == '(') || (current_value[k] == ')')
|| (current_value[k] == '{') || (current_value[k] == '}') ))
break;
}
if (k == n)
return;
#else /* NOT defined(HAVE_OLDCODE) */
if (check_patterns(&pt_number,current_value) == YES)
return;
#endif /* defined(HAVE_OLDCODE) */
unexpected();
}
static void
check_other(VOID)
{
int k; /* index into pattern_names[] */
size_t n = strlen(current_key);
for (k = 0; pattern_names[k].name != (const char*)NULL; ++k)
{
if (strnicmp(pattern_names[k].name,current_key,n) == 0)
{ /* then found the required table */
if (check_patterns(pattern_names[k].table,current_value) == NO)
unexpected();
return;
}
}
}
static void
check_pages(VOID)
{
/* Need to handle "B721--B729" as well as "721--729"; some
physics journals use an initial letter in page number. */
#if defined(HAVE_OLDCODE)
int number = 1;
size_t k;
size_t n = strlen(current_value) - 1;
if (STREQUAL(current_value,"\"\"")) /* ignore empty values */
return;
/* We expect the value string to match the regexps [0-9]+ or
[0-9]+--[0-9]+ */
for (k = 1; k < n; ++k)
{ /* omit first and last characters -- they are quotation marks */
switch (current_value[k])
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (number > 2)
{
warning("More than 2 page numbers in ``%k = %v''");
return;
}
break;
case '-':
number++;
if (current_value[k+1] != '-') /* expect -- */
{
warning(
"Use en-dash, --, to separate page numbers in ``%k = %v''");
return;
}
++k;
if (current_value[k+1] == '-') /* should not have --- */
{
warning(
"Use en-dash, --, to separate page numbers in ``%k = %v''");
return;
}
break;
case ',':
number++;
break;
default:
unexpected();
return;
}
}
#else /* NOT defined(HAVE_OLDCODE) */
if (check_patterns(&pt_pages,current_value) == YES)
return;
#endif /* defined(HAVE_OLDCODE) */
unexpected();
}
#if (defined(HAVE_PATTERNS) || defined(HAVE_REGEXP) || defined(HAVE_RECOMP))
#if NEW_STYLE
static YESorNO
check_patterns(PATTERN_TABLE* pt,const char *value)
#else /* K&R style */
static YESorNO
check_patterns(pt,value)
PATTERN_TABLE* pt;
const char *value;
#endif /* NEW_STYLE */
{
/* Return YES if current_value[] matches a pattern, or there are no
patterns, and NO if there is a match failure. Any message
associated with a successfully-matched pattern is printed before
returning. */
int k;
for (k = 0; k < pt->current_size; ++k)
{
if (PATTERN_MATCHES(value,pt->patterns[k].pattern))
{
if (pt->patterns[k].message != (const char*)NULL)
{
if (pt->patterns[k].message[0] == '?') /* special error flag */
error(pt->patterns[k].message + 1);
else /* just normal warning */
warning(pt->patterns[k].message);
}
return (YES);
}
}
return ((pt->current_size == 0) ? YES : NO);
}
#endif /* (defined(HAVE_PATTERNS) || defined(HAVE_REGEXP) ||
defined(HAVE_RECOMP)) */
static void
check_tag(VOID)
{
int k; /* index into pattern_names[] */
size_t n = strlen(current_tag);
for (k = 0; pattern_names[k].name != (const char*)NULL; ++k)
{
if (strnicmp(pattern_names[k].name,current_tag,n) == 0)
{ /* then found the required table */
if (check_patterns(pattern_names[k].table,current_tag) == NO)
warning("Unexpected citation tag ``%t''");
return;
}
}
}
static void
check_volume(VOID)
{
#if defined(HAVE_OLDCODE)
size_t k;
size_t n = strlen(current_value) - 1;
if (STREQUAL(current_value,"\"\"")) /* ignore empty values */
return;
/* Match patterns like "27", "27A", "27/3", "27A 3", "SMC-13", "VIII",
"B", "{IX}", "1.2", "Special issue A", and "11 and 12". However,
NEVER match pattern like "11(5)", since that is probably an erroneous
incorporation of issue number into the volume value. */
for (k = 1; k < n; ++k)
{ /* omit first and last characters -- they are quotation marks */
if (!( isalnum(current_value[k])
|| (current_value[k] == '-')
|| (current_value[k] == '/')
|| (current_value[k] == '.')
|| (current_value[k] == ' ')
|| (current_value[k] == '{')
|| (current_value[k] == '}') ))
{
unexpected();
return;
}
}
#else /* NOT defined(HAVE_OLDCODE) */
if (check_patterns(&pt_volume,current_value) == YES)
return;
#endif /* defined(HAVE_OLDCODE) */
unexpected();
}
static void
check_year(VOID)
{
char *p;
char *q;
long year;
#if defined(HAVE_OLDCODE)
size_t k;
size_t n;
if (STREQUAL(current_value,"\"\"")) /* ignore empty values */
return;
/* We expect the value string to match the regexp [0-9]+ */
for (k = 1, n = strlen(current_value) - 1; k < n; ++k)
{ /* omit first and last characters -- they are quotation marks */
if (!isdigit(current_value[k]))
{
warning("Non-digit found in value field of ``%k = %v''");
return;
}
}
#else /* NOT defined(HAVE_PATTERNS) */
if (check_patterns(&pt_year,current_value) == YES)
return;
unexpected();
#endif /* defined(HAVE_PATTERNS) */
for (p = current_value; *p ; ) /* now validate all digit strings */
{
if (isdigit(*p)) /* then have digit string */
{ /* now make sure year is `reasonable' */
year = strtol(p,&q,10);
if ((year < 1800L) || (year > 2099L))
warning("Suspicious year in ``%k = %v''");
p = q;
}
else /* ignore other characters */
p++;
}
}
#if NEW_STYLE
static void
do_args(int argc, char *argv[])
#else /* K&R style */
static void
do_args(argc,argv)
int argc;
char *argv[];
#endif /* NEW_STYLE */
{
int k;
int nfiles;
static KEY_FUNCTION_ENTRY options[] =
{
{"?", 1, opt_help},
{"author", 1, opt_author},
{"check-values", 1, opt_check_values},
{"delete-empty-fields", 1, opt_delete_empty_fields},
{"error-log", 1, opt_error_log},
{"file-position", 3, opt_file_position},
{"fix-initials", 5, opt_fix_initials},
{"fix-names", 5, opt_fix_names},
{"help", 1, opt_help},
{"init-file", 1, opt_init_file},
{"no-check-values", 4, opt_check_values},
{"no-delete-empty-fields", 4, opt_delete_empty_fields},
{"no-file-position", 6, opt_file_position},
{"no-fix-initials", 8, opt_fix_initials},
{"no-fix-names", 8, opt_fix_names},
{"no-parbreaks", 5, opt_parbreaks},
{"no-print-patterns", 5, opt_print_patterns},
{"no-read-init-files", 6, opt_read_init_files},
{"no-remove-OPT-prefixes", 6, opt_remove_OPT_prefixes},
{"no-scribe", 4, opt_scribe},
{"no-trace-file-opening", 4, opt_trace_file_opening},
{"no-warnings", 4, opt_warnings},
{"parbreaks", 2, opt_parbreaks},
{"print-patterns", 2, opt_print_patterns},
{"read-init-files", 3, opt_read_init_files},
{"remove-OPT-prefixes", 3, opt_remove_OPT_prefixes},
{"scribe", 1, opt_scribe},
{"trace-file-opening", 1, opt_trace_file_opening},
{"version", 1, opt_version},
{"warnings", 1, opt_warnings},
{(const char*)NULL, 0, (void (*)(VOID))NULL},
};
for (nfiles = 1, k = 1; k < argc; ++k)
{
if ( (argv[k][1] != '\0') && isoptionprefix(argv[k][0]) )
{ /* then process command-line switch */
current_index = k; /* needed by opt_init_file() and */
next_option = argv[k+1]; /* opt_error_log() */
current_option = argv[k]; /* needed by YESorNOarg() */
if (apply_function(current_option+1,options) == NO)
{
usage();
exit(EXIT_FAILURE);
}
k = current_index; /* some opt_xxx() functions update it */
}
else /* save file names */
argv[nfiles++] = argv[k]; /* shuffle file names down */
}
argv[nfiles] = (char*)NULL; /* terminate new argument list */
}
static void
do_at(VOID) /* parse @name{...} */
{
int c;
c = get_next_non_blank();
the_entry = the_file;
if ((non_white_chars == 1) && (c == '@'))
{
at_level++;
out_c(c);
if (brace_level != 0)
{
error(
"@ begins line, but brace level is not zero after entry ``@%e{%t,''");
brace_level = 0;
}
}
else if (c != EOF)
{
out_c(c);
out_with_error("", "Expected @name{...} after entry ``@%e{%t,''");
}
}
static void
do_BibTeX_entry(VOID)
{
/*************************************************************
Parse a BibTeX entry, one of:
@entry-name{tag,key=value,key=value,...,}
@string{name=value}
@preamble{...}
*************************************************************/
new_entry();
do_at();
if ((rflag == YES) || (eofile == YES)) return;
do_entry_name();
if (rflag == YES) return;
if (STREQUAL(current_entry_name,"Preamble"))
do_group();
else if (STREQUAL(current_entry_name,"String"))
do_group();
else /* expect @name{tag, key = value, ... } */
{
do_open_brace();
if (rflag == YES) return;
do_tag_name();
if (rflag == YES) return;
do_comma();
if (rflag == YES) return;
while (do_key_value_pair() == YES)
NOOP;
if (rflag == YES) return;
do_close_brace();
}
flush_inter_entry_space();
}
/***********************************************************************
BibTeX value fields can take several forms, as illustrated by this
simple BNF grammar:
BibTeX-value-string:
simple-string |
simple-string # BibTeX-value-string
simple-string:
"quoted string" |
{braced-string} |
digit-sequence |
alpha-sequence |
***********************************************************************/
static void
do_BibTeX_value(VOID) /* process BibTeX value string */
{
int c;
size_t k;
char *s;
static char empty_string[] = "";
size_t n;
the_value = the_file;
s = get_simple_string();
for (k = 0; *s; )
{
n = strlen(s);
if ((k + n) >= MAX_TOKEN)
{
current_value[k] = (char)'\0';
out_s(current_value);
out_with_error(s, "Value too long for key ``%k''");
return;
}
(void)strcpy(¤t_value[k],s);
k += n;
c = get_next_non_blank();
if ((parbreaks == NO) && (is_parbreak == YES))
{
APPEND_CHAR(current_value,k,c);
out_with_parbreak_error(current_value);
return;
}
if (c == '#')
{
if ((k + 3) >= MAX_TOKEN)
{
current_value[k] = (char)'\0';
out_s(current_value);
out_with_error(" # ", "Value too long for key ``%k''");
return;
}
current_value[k++] = (char)' ';
current_value[k++] = (char)'#';
current_value[k++] = (char)' ';
s = get_simple_string();
}
else /* end of string */
{
put_back(c);
s = empty_string;
}
}
out_value();
}
static void
do_close_brace(VOID) /* parse level 1 closing brace or parenthesis */
{
int c;
c = get_next_non_blank();
if (c == EOF)
return;
else if (c == close_char)
{
if (c == ')')
brace_level--; /* get_char() could not do this for us */
out_c('}'); /* standardize parenthesis to brace */
if (brace_level != 0)
out_with_error("",
"Non-zero brace level after @name{...} processed. Last tag = ``%t''");
}
else /* raise error and try to resynchronize */
{
out_c(c);
out_with_error("",
"Expected closing brace or parenthesis in entry ``@%e{%t,''");
}
}
static void
do_comma(VOID)
{
int c;
/* Parse a comma, or an optional comma before a closing brace or
parenthesis; an omitted legal comma is supplied explicitly.
A newline is output after the comma so that key = value
pairs appear on separate lines. */
the_value = the_file;
c = get_next_non_blank();
if (c == EOF)
NOOP;
else if (c == ',')
{
if (discard_next_comma == NO)
out_s(",\n");
}
else if (c == close_char)
{ /* supply missing comma for last key = value pair*/
if (c == ')')
brace_level--; /* get_char() could not do this for us */
if (brace_level == 0) /* reached end of bibliography entry */
{
if (c == ')')
brace_level++; /* put_back() could not do this for us */
put_back(c);
if (discard_next_comma == NO)
out_s(",\n");
}
else /* no comma, and still in bibliography entry */
{
out_c(c);
out_with_error("","Non-zero brace level after @name{...} \
processed. Last entry = ``@%e{%t,''");
}
}
else /* raise error and try to resynchronize */
{
out_c(c);
out_with_error("", "Expected comma after last key ``%k''");
}
discard_next_comma = NO;
}
static void
do_entry_name(VOID) /* process BibTeX entry name */
{
int c;
size_t k;
int n;
static NAME_PAIR entry_pair[] =
{ /* entry name case change table */
{ "Deathesis", "DEAthesis" },
{ "Inbook", "InBook" },
{ "Incollection", "InCollection" },
{ "Inproceedings", "InProceedings" },
{ "Mastersthesis", "MastersThesis" },
{ "Phdthesis", "PhdThesis" },
{ "Techreport", "TechReport" },
};
for (k = 0; ((c = get_next_non_blank()) != EOF) && isidchar(c); ++k)
{ /* store capitalized entry name */
if ((k == 0) && !isalpha(c))
error("Non-alphabetic character begins an entry name");
if ((k == 0) && islower(c))
c = toupper(c);
else if ((k > 0) && isupper(c))
c = tolower(c);
if ((parbreaks == NO) && (is_parbreak == YES))
{
APPEND_CHAR(current_entry_name,k,c);
out_with_parbreak_error(current_entry_name);
return;
}
if (k >= MAX_TOKEN)
{
APPEND_CHAR(current_entry_name,k,c);
out_with_error(current_entry_name, "@entry_name too long");
return;
}
current_entry_name[k] = (char)c;
}
current_entry_name[k] = (char)'\0';
if (c != EOF)
put_back(c);
/* Substitute a few keywords that look better in upper case */
for (n = 0; n < (int)(sizeof(entry_pair)/sizeof(entry_pair[0])); ++n)
if (STREQUAL(current_entry_name,entry_pair[n].old_name))
(void)strcpy(current_entry_name,entry_pair[n].new_name);
out_s(current_entry_name);
check_length(k);
}
static void
do_equals(VOID) /* process = in key = value */
{
int c;
the_value = the_file;
c = get_next_non_blank();
if (c == EOF)
NOOP;
else if (c == '=')
out_equals();
else
{
out_c(c);
out_with_error("", "Expected \"=\" after key ``%k''");
}
out_spaces((int)(VALUE_INDENTATION - the_file.output.column_position));
/* supply leading indentation */
}
#if NEW_STYLE
static void
do_escapes(char *s)
#else /* K&R style */
static void
do_escapes(s)
char *s;
#endif /* NEW_STYLE */
{ /* reduce escape sequences in s[] */
int base; /* number base for strtol() */
char *endptr; /* pointer returned by strtol() */
char *p; /* pointer into output s[] */
if (s == (char*)NULL) /* nothing to do if no string */
return;
for (p = s ; *s ; ++s)
{
if (*s == '\\') /* have escaped character */
{
base = 8; /* base is tentatively octal */
switch (*++s)
{
case 'a': *p++ = CTL('G'); break;
case 'b': *p++ = CTL('H'); break;
case 'f': *p++ = CTL('L'); break;
case 'n': *p++ = CTL('J'); break;
case 'r': *p++ = CTL('M'); break;
case 't': *p++ = CTL('I'); break;
case 'v': *p++ = CTL('K'); break;
case '0':
if (TOLOWER(s[1]) == 'x') /* 0x means hexadecimal */
base = 16;
/* FALL THROUGH */
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
*p++ = (char)strtol((const char*)s,&endptr,base);
s = endptr - 1; /* point to last used character */
break;
default: /* \x becomes x for all other x */
*p++ = *s;
break;
}
}
else /* not escaped, so just copy it */
*p++ = *s;
}
*p = '\0'; /* terminate final string */
}
#if NEW_STYLE
static void
do_fileinit(const char *bibfilename) /* process one initialization file */
#else /* K&R style */
static void
do_fileinit(bibfilename) /* process one initialization file */
const char *bibfilename;
#endif /* NEW_STYLE */
{
char *p;
if (strrchr(bibfilename,'.') != (char*)NULL) /* then have file extension */
{ /* convert foo.bib to foo.ini and then process it as an init file */
if ((p = (char*)malloc(strlen(bibfilename) +
sizeof(INITFILE_EXT) + 1)) != (char*)NULL)
{
(void)strcpy(p,bibfilename);
(void)strcpy(strrchr(p,'.'),INITFILE_EXT);
do_initfile((char*)NULL,p);
free(p);
}
}
}
#if NEW_STYLE
static void
do_files(int argc, char *argv[])
#else /* K&R style */
static void
do_files(argc,argv)
int argc;
char *argv[];
#endif /* NEW_STYLE */
{
FILE *fp;
int k = argc; /* index into argv[] */
/* set to argc to remove optimizer complaints about unused argument */
if (argv[1] == (char*)NULL) /* no files specified, so use stdin */
{
the_file.input.filename = "stdin";
do_one_file(stdin);
}
else /* else use command-line files left in argv[] */
{
for (k = 1; argv[k] != (char*)NULL; ++k)
{
if (STREQUAL(argv[k],"-"))
{
/* A filename of "-" is conventionally interpreted in
the UNIX world as a synonym for stdin, since that
system otherwise lacks true filenames for stdin,
stdout, and stdlog. We process stdin with
do_one_file(), but never close it so that subsequent
read attempts will silently, and harmlessly, fail
at end-of-file. */
the_file.input.filename = "stdin";
do_one_file(stdin);
}
else if ((fp = tfopen(argv[k], "r")) == (FILE*)NULL)
(void)fprintf(stdlog,
"\n%s Ignoring open failure on file [%s]\n",
ERROR_PREFIX, argv[k]);
else /* open succeeded, so process file */
{
if (k > 1) /* supply blank line between */
out_c('\n'); /* entries at file boundaries */
the_file.input.filename = argv[k];
if (read_initialization_files == YES)
do_fileinit(the_file.input.filename);
do_one_file(fp);
(void)fclose(fp); /* close to save file resources */
}
}
}
}
static void
do_group(VOID) /* copy a braced group verbatim */
{
int c;
do_open_brace();
if (rflag == YES) return;
while ((c = get_char()) != EOF)
{
if ((brace_level == 1) && (close_char == ')') && (c == close_char))
{ /* end of @keyword(...) */
brace_level = 0;
c = '}';
}
if ((non_white_chars == 1) && (c == '@'))
error("@ begins line, but brace level is not zero after \
entry ``@%e{%t,''");
out_c(c);
if (brace_level == 0)
break;
}
}
#if NEW_STYLE
static void
do_initfile(const char *pathlist, const char *name)
#else /* K&R style */
static void
do_initfile(pathlist,name)
const char *pathlist;
const char *name;
#endif /* NEW_STYLE */
{
FILE *fp;
char *p;
if ((initialization_file_name = findfile(pathlist,name)) == (char*)NULL)
return; /* silently ignore missing files */
if ((fp = tfopen(initialization_file_name,"r")) == (FILE*)NULL)
return; /* silently ignore missing files */
while ((p = get_line(fp)) != (char *)NULL)
{ /* process init file lines */
SKIP_SPACE(p);
if (isoptionprefix(*p))
do_single_arg(p); /* then expect -option [value] */
else
do_new_pattern(p); /* else expect key = "value" */
}
(void)fclose(fp);
}
static void
do_key(VOID) /* process BibTeX key name */
{
int c;
size_t k;
int n;
static NAME_PAIR key_pair[] =
{ /* keyword case change table */
{ "ansi-standard-number", "ANSI-standard-number" },
{ "ieee-standard-number", "IEEE-standard-number" },
{ "isbn", "ISBN" },
{ "iso-standard-number", "ISO-standard-number" },
{ "issn", "ISSN" },
{ "lccn", "LCCN" },
};
the_value = the_file;
for (k = 0, c = get_next_non_blank(); (c != EOF) && isidchar(c);
c = get_char(), k++)
{
if (k >= MAX_TOKEN)
{
APPEND_CHAR(current_key,k,c);
out_with_error(current_key, "Entry keyword too long");
return;
}
else if ((k == 0) && !isalpha(c))
error("Non-alphabetic character begins a keyword");
current_key[k] = (char)(isupper(c) ? tolower(c) : c);
}
if (c != EOF)
put_back(c);
current_key[k] = (char)'\0';
/* Substitute a few keywords that look better in upper case */
for (n = 0; n < (int)(sizeof(key_pair)/sizeof(key_pair[0])); ++n)
if (STREQUAL(current_key,key_pair[n].old_name))
(void)strcpy(current_key,key_pair[n].new_name);
if (strncmp("opt",current_key,3) == 0) /* GNU Emacs bibtex.el expects OPT*/
(void)strncpy(current_key,"OPT",3);
if (k > 0)
out_key();
check_length(k);
}
static YESorNO
do_key_value_pair(VOID) /* process key = value pair */
{
if (eofile == YES) return (NO);
do_key();
if ((rflag == YES) || (eofile == YES) || (current_key[0] == '\0'))
return (NO);
if (Scribe == YES)
do_Scribe_separator();
else
do_equals();
if ((rflag == YES) || (eofile == YES)) return (NO);
if (Scribe == YES)
do_Scribe_value();
else
do_BibTeX_value();
if ((rflag == YES) || (eofile == YES)) return (NO);
do_comma(); /* this supplies any missing optional comma */
if ((rflag == YES) || (eofile == YES)) return (NO);
return (YES);
}
#if (SCREEN_LINES > 0)
#if NEW_STYLE
static int
do_more(FILE *fpout,int line_number, int pause_after)
#else /* K&R style */
static int
do_more(fpout, line_number, pause_after)
FILE *fpout;
int line_number;
int pause_after;
#endif /* NEW_STYLE */
{
#if OS_PCDOS
#define MORE_HELP \
"More? f)orward b)ackward e)nd q)uit r)efresh t)op \030 \031 PgUp PgDn Home \
End\n\r"
#else /* NOT OS_PCDOS */
#define MORE_HELP \
"More? f)orward b)ackward d)own e)nd q)uit r)efresh t)op u)p\n\r"
#endif /* OS_PCDOS */
(void)fputs(MORE_HELP,fpout);
(void)fflush(fpout); /* make screen up-to-date */
for (;;) /* loop until a valid input code is received */
{
switch (kbcode())
{
case KEY_PGUP: /* backward screen */
return (MAX(0,line_number + 1 - 2*pause_after));
case KEY_DOWN: /* go down 1 line (scroll up 1 line) */
return (line_number + 2 - pause_after);
case KEY_END: /* end */
return (LAST_SCREEN_LINE);
case KEY_PGDN: /* forward screen */
return (line_number + 1);
case KEY_HELP:
(void)fputs(MORE_HELP,fpout);
break;
case KEY_EOF:
case KEY_QUIT:
return (EOF);
case KEY_AGAIN: /* refresh */
return (MAX(0,line_number + 1 - pause_after));
case KEY_HOME: /* top */
return (0);
case KEY_UP: /* go up 1 line (scroll down 1 line) */
return (line_number + 0 - pause_after);
case KEY_UNKNOWN:
default: /* anything else produces */
fputc('\007',fpout); /* an error beep */
break;
} /* end switch (c...) */
} /* end for (;;) */
}
#endif /* (SCREEN_LINES > 0) */
#if NEW_STYLE
static void
do_new_pattern(char *s)
#else /* K&R style */
static void
do_new_pattern(s)
char *s;
#endif /* NEW_STYLE */
{
char *key;
char *p = s;
YESorNO saw_space;
char *value;
/*******************************************************************
We expect s[] to contain
key = "value"
key : "value"
key "value"
key = "value" "message"
key : "value" "message"
key "value" "message"
Empty lines are silently ignored.
*******************************************************************/
key = get_token(p,&p,"=: \t\v\f");
if (key == (char*)NULL)
return; /* then we have an empty line */
if (p != (char*)NULL) /* then we have more text */
{
saw_space = isspace(*p) ? YES : NO;
SKIP_SPACE(p);
if (saw_space || iskeyvalueseparator(*p))
{
if (iskeyvalueseparator(*p))
++p; /* then move past separator */
SKIP_SPACE(p);
if (*p == '"') /* then have quoted value */
{
value = get_token(p,&p," \t\v\f");
if (value != (char*)NULL)
{
SKIP_SPACE(p);
if (*p == '"') /* then have quoted message */
{
add_pattern(key,value,get_token(p,&p," \t\v\f"));
return;
}
else if ((*p == '\0') || (*p == COMMENT_PREFIX))
{ /* have end of string s[] */
add_pattern(key,value,(char*)NULL);
return;
}
}
}
}
}
(void)fprintf(stdlog,"%s Bad line [%s] in initialization file [%s]\n",
ERROR_PREFIX, s, initialization_file_name);
exit(EXIT_FAILURE);
}
#if NEW_STYLE
static void
do_one_file(FILE *fp) /* process one input file on fp */
#else /* K&R style */
static void
do_one_file(fp) /* process one input file on fp */
FILE *fp;
#endif /* NEW_STYLE */
{
fpin = fp; /* save file pointer globally for get_char() */
new_io_pair(&the_file);
eofile = NO;
new_entry();
while (eofile == NO)
{
do_other();
if (Scribe == YES)
do_Scribe_entry();
else
do_BibTeX_entry();
}
out_flush(); /* flush all buffered output */
}
static void
do_open_brace(VOID) /* process open brace or parenthesis */
{
int c;
c = get_next_non_blank();
if (c == EOF)
return;
else if (c == '{')
{
close_char = '}';
out_c('{');
}
else if (c == '(')
{
close_char = ')';
brace_level++; /* get_char() could not do this for us */
out_c('{'); /* standardize parenthesis to brace */
}
else /* raise error and try to resynchronize */
{
out_c(c);
out_with_error("",
"Expected open brace or parenthesis. Last entry = ``@%e{%t,''");
}
}
static void
do_other(VOID) /* copy non-BibTeX text verbatim */
{
int c;
while ((c = get_char()) != EOF)
{
if ((c == '@') && (non_white_chars == 1))
{ /* new entry found */
put_back(c);
break;
}
out_c(c);
}
}
#if NEW_STYLE
static void
do_preargs(int argc, char *argv[])
#else /* K&R style */
static void
do_preargs(argc,argv)
int argc;
char *argv[];
#endif /* NEW_STYLE */
{
int k;
static KEY_FUNCTION_ENTRY options[] =
{
{"no-print-patterns", 5, opt_print_patterns},
{"no-read-init-files", 6, opt_read_init_files},
{"no-trace-file-opening", 4, opt_trace_file_opening},
{"print-patterns", 2, opt_print_patterns},
{"read-init-files", 3, opt_read_init_files},
{"trace-file-opening", 1, opt_trace_file_opening},
{(const char*)NULL, 0, (void (*)(VOID))NULL},
};
for (k = 1; k < argc; ++k)
{
/* Do argument scan for options that must be known BEFORE
initializations are attempted. */
if ( (argv[k][1] != '\0') && isoptionprefix(argv[k][0]) )
{ /* then process command-line switch */
current_index = k;
current_option = argv[k];
next_option = argv[k+1];
(void)apply_function(current_option+1,options);
}
}
}
static void
do_Scribe_block_comment(VOID)
{
int b_level = 0; /* brace level */
int c;
int k;
char *p;
char s[3+1]; /* to hold "end" */
p = get_Scribe_string(); /* expect to get "comment" */
if (strnicmp(p,"\"comment\"",9) == 0)
{ /* found start of @Begin{comment} */
for (k = 6; k > 0; --k)
out_c(DELETE_CHAR); /* delete "@Begin" from output */
/* that was output by do_entry_name() */
out_s("@Comment{"); /* convert to BibTeX `comment' */
while ((c = get_char()) != EOF)
{
switch (c)
{
case '@': /* lookahead for "@End" */
s[0] = (char)get_char();
s[1] = (char)get_char();
s[2] = (char)get_char();
s[3] = (char)'\0';
if (strnicmp(s,"end",3) == 0)
{ /* then we have @End */
p = get_Scribe_string(); /* so get what follows */
if (strnicmp(p,"\"Comment\"",9) == 0)
{
out_c('}'); /* found @End{comment}, so finish
conversion to @Comment{...} */
return; /* block comment conversion done! */
}
else /* false alarm, just stuff lookahead */
{ /* back into input stream */
put_back_string(p);
put_back_string(s);
}
}
else /* lookahead was NOT "@End" */
put_back_string(s);
break;
case '{':
b_level++;
break;
case '}':
if (b_level <= 0)
out_c('{'); /* keep output braces balanced */
else
b_level--;
break;
} /* end switch(c) */
out_c(c); /* copy one comment character */
} /* end while ((c = ...)) */
}
else /* was not @Begin{comment} after all */
put_back_string(p);
}
static void
do_Scribe_close_delimiter(VOID)
{
int c;
static char fmt[] = "Expected Scribe close delimiter `%c' [8#%03o], but \
found `%c' [8#%03o] instead for key ``%%k''";
char msg[sizeof(fmt)];
c = get_next_non_blank();
if ((parbreaks == NO) && (is_parbreak == YES))
{
APPEND_CHAR(msg,0,c);
out_with_parbreak_error(msg);
return;
}
if (c == EOF)
return;
else if (c == close_char)
out_c('}'); /* standardize parenthesis to brace */
else /* raise error and try to resynchronize */
{
out_c(c);
(void)sprintf(msg, fmt, close_char, (unsigned int)close_char,
(int)(isprint(c) ? c : '?'), (unsigned int)c);
out_with_error("", msg);
}
}
static void
do_Scribe_comment(VOID)
{
int c;
int b_level = 0; /* brace level */
/* BibTeX does not yet have a comment syntax, so we just output the
Scribe comment in braces, ensuring that internal braces are balanced. */
do_Scribe_open_delimiter(); /* this outputs an opening brace */
if (rflag == YES) return;
for (c = get_char(); (c != EOF) && (c != close_char); c = get_char())
{
if (c == '{')
b_level++;
else if (c == '}')
{
b_level--;
if (b_level < 0)
{
out_c('{'); /* force matching internal braces */
b_level++;
}
}
out_c(c);
}
for (; b_level > 0; b_level--)
out_c('}'); /* force matching internal braces */
out_c('}');
}
static void
do_Scribe_entry(VOID)
{
/*************************************************************
Parse a Scribe entry, one of:
@entry-name{tag,key=value,key=value,...,}
@string{name=value}
@comment{...}
@begin{comment}...@end{comment}
The = separator in key/value pairs may also be a space or
a slash.
Any of the seven Scribe delimiters can be used to surround
the value(s) following @name, and to surround values of
key value pairs.
*************************************************************/
int save_close_char;
new_entry();
do_at();
if ((rflag == YES) || (eofile == YES)) return;
do_entry_name();
if (rflag == YES) return;
if (STREQUAL(current_entry_name,"Comment"))
do_Scribe_comment();
else if (STREQUAL(current_entry_name,"Begin"))
do_Scribe_block_comment();
else if (STREQUAL(current_entry_name,"String"))
do_group();
else
{
do_Scribe_open_delimiter();
if (rflag == YES) return;
save_close_char = close_char;
brace_level = 1; /* get_char() cannot do this for us */
do_tag_name();
if (rflag == YES) return;
do_comma();
if (rflag == YES) return;
while (do_key_value_pair() == YES)
NOOP;
if (rflag == YES) return;
close_char = save_close_char;
do_Scribe_close_delimiter();
}
flush_inter_entry_space();
}
static void
do_Scribe_open_delimiter(VOID) /* process open delimiter */
{
int c;
char *p;
c = get_next_non_blank();
if (c == EOF)
return;
else
{
p = strchr(Scribe_open_delims,c);
if (p == (char*)NULL)
{
out_c(c);
out_with_error("",
"Expected Scribe open delimiter, one of { [ ( < ' \" ` for key ``%k''");
return;
}
close_char = Scribe_close_delims[(int)(p - Scribe_open_delims)];
out_c('{'); /* standardize open delimiter to brace */
}
}
static void
do_Scribe_separator(VOID)
{
int c;
YESorNO saw_space = NO;
the_value = the_file;
c = get_char(); /* need to look ahead one */
put_back(c); /* character in case separator */
saw_space = isspace(c) ? YES : NO; /* is just a space */
c = get_next_non_blank();
if ((parbreaks == NO) && (is_parbreak == YES))
{
char msg[2];
APPEND_CHAR(msg,0,c);
out_with_parbreak_error(msg);
return;
}
if (c == EOF)
NOOP;
else if ((c == '=') || (c == '/'))
out_equals();
else if (saw_space == YES) /* have key value with no binary operator */
{
out_equals(); /* supply the missing = operator */
put_back(c); /* this is first character of value string */
}
else /* looks like run-together keyvalue */
{
out_c(c);
out_with_error("",
"Expected Scribe separator \"=\", \"/\", or \" \" for key ``%k''");
}
out_spaces((int)(VALUE_INDENTATION - the_file.output.column_position));
/* supply leading indentation */
}
/***********************************************************************
Scribe value fields can take several forms, as illustrated by this
simple BNF grammar:
Scribe-value-string:
<open-delimiter><not-open-or-close-delimiter>*<close-delimiter> |
<digit><letter-or-digit-or-dot>*
***********************************************************************/
static void
do_Scribe_value(VOID) /* process Scribe value string */
{
the_value = the_file;
(void)strcpy(current_value,get_Scribe_string());
if ((rflag == YES) || (eofile == YES))
out_s(current_value);
else
out_value();
}
#if NEW_STYLE
static void
do_single_arg(char *s)
#else /* K&R style */
static void
do_single_arg(s)
char *s;
#endif /* NEW_STYLE */
{ /* expect -option or -option value */
char *temp_argv[4]; /* "program" "-option" "value" NULL */
int temp_argc; /* temporary argument count */
temp_argv[0] = program_name; /* 0th argument always program name */
temp_argv[1] = get_token(s,&s," \t\v\f"); /* option */
temp_argv[2] = get_token(s,&s," \t\v\f"); /* value */
temp_argv[3] = (char *)NULL;
temp_argc = (temp_argv[2] == (char*)NULL) ? 2 : 3;
do_args(temp_argc,temp_argv);
}
static void
do_tag_name(VOID) /* process BibTeX citation tag */
{
int c;
size_t k;
for (k = 0, c = get_next_non_blank();
(c != EOF) && (c != ',') && !isspace(c);
c = get_char(), k++)
{
if (k >= MAX_TOKEN)
{
APPEND_CHAR(current_tag,k,c);
out_with_error(current_tag, "Citation tag too long");
return;
}
current_tag[k] = (char)c;
}
current_tag[k] = (char)'\0';
if (c != EOF)
put_back(c);
if (check_values == YES)
check_tag();
out_s(current_tag);
check_length(k);
}
#if NEW_STYLE
static void
enlarge_table(PATTERN_TABLE *table)
#else /* K&R style */
static void
enlarge_table(table)
PATTERN_TABLE *table;
#endif /* NEW_STYLE */
{
if (table->maximum_size == 0)
table->patterns = (MATCH_PATTERN*)malloc(sizeof(MATCH_PATTERN) *
TABLE_CHUNKS);
else
table->patterns = (MATCH_PATTERN*)realloc((char*)table->patterns,
sizeof(MATCH_PATTERN) * (table->maximum_size + TABLE_CHUNKS));
/* NB: Sun C++ requires (char*) cast */
if (table->patterns == (MATCH_PATTERN*)NULL)
fatal("Out of memory for pattern table space");
table->maximum_size += TABLE_CHUNKS;
}
#if NEW_STYLE
static void /* issue an error message */
error(const char *msg) /* default provided if this is NULL */
#else /* K&R style */
static void
error(msg) /* issue an error message */
const char *msg; /* default provided if this is NULL */
#endif /* NEW_STYLE */
{
char *p;
error_count++;
out_flush(); /* flush all buffered output */
at_level = 0; /* suppress further messages */
/* until we have resynchronized */
p = format(msg);
(void)fprintf(stdlog,"%s \"%s\", line %ld: %s.\n",
ERROR_PREFIX,
the_file.input.filename, the_value.input.line_number, p);
/* UNIX-style error message for */
/* GNU Emacs M-x compile to parse */
out_status(stdlog, ERROR_PREFIX);
(void)fflush(stdlog);
out_error(stdout, "\n"); /* make sure we start a newline */
out_error(stdout, ERROR_PREFIX);
out_error(stdout, " ");
out_error(stdout, p);
out_error(stdout, ".\n");
out_status(stdout, ERROR_PREFIX);
out_flush(); /* flush all buffered output */
}
#if NEW_STYLE
static void /* issue an error message and die */
fatal(const char *msg)
#else /* K&R style */
static void
fatal(msg) /* issue an error message and die */
const char *msg;
#endif /* NEW_STYLE */
{
(void)fprintf(stdlog,"%s %s\n", ERROR_PREFIX, msg);
exit(EXIT_FAILURE);
}
#if NEW_STYLE
static char* /* normalize author names and return */
fix_author(char *author) /* new string from static space */
#else /* K&R style */
static char*
fix_author(author) /* normalize author names and return */
char *author; /* new string from static space */
#endif /* NEW_STYLE */
{
size_t a; /* index into author[] */
int b_level; /* brace level */
char *p; /* pointer into author[] */
static char s[MAX_TOKEN_SIZE]; /* returned to caller */
/* Convert "Smith, J.K." to "J. K. Smith" provided "," and "." are */
/* at brace level 0 */
if (fix_names == NO)
return (author);
/* Leave untouched entries like: */
/* author = "P. D. Q. Bach (113 MozartStrasse, Vienna, Austria)" */
if (strchr(author,'(') != (char*)NULL)
return (author);
b_level = 0;
for (a = 0; author[a]; ++a) /* convert "Smith, John" to "John Smith" */
{ /* leaving period job to fix_periods() */
switch (author[a])
{
case '{':
b_level++;
break;
case '}':
b_level--;
break;
case ',':
if (b_level == 0)
{
for (p = &author[a+1]; isspace(*p); ++p)
/* NO-OP */;
(void)strcpy(s,p); /* s <- "J.K." */
(void)strcat(s," "); /* s <- "J.K. " */
(void)strncat(s,author,a); /* s <- "J.K. Smith" */
return (strcpy(author,s));
}
}
}
return (author);
}
static void
fix_month(VOID) /* convert full month names to macros*/
{ /* for better style-file customization */
int k;
size_t n = strlen(current_value);
for (k = 0; k < 12; ++k)
{
if (strnicmp(current_value,month_pair[k].old_name,n) == 0)
{ /* change "January" to jan etc. */
(void)strcpy(current_value,month_pair[k].new_name);
break;
}
else if ((n == 5) &&
(current_value[0] == '"') &&
(current_value[4] == '"') &&
(strnicmp(¤t_value[1],month_pair[k].new_name,3) == 0))
{ /* change "jan" to jan etc. */
(void)strcpy(current_value,month_pair[k].new_name);
break;
}
}
}
static void
fix_namelist(VOID) /* normalize list of personal names */
{ /* leaving it in global current_value[] */
size_t m; /* index of start of author in current_value[]*/
size_t n; /* length of current_value[], less 1 */
char namelist[MAX_TOKEN_SIZE]; /* working copy of current_value[] */
size_t v; /* loop index into current_value[] */
/* Convert "Smith, J.K. and Brown, P.M." to */
/* "J. K. Smith and P. M. Brown" */
/* We loop over names separated by " and ", and hand each off */
/* to fix_author() */
n = strlen(current_value) - 1; /* namelist = "\"...\"" */
if ((current_value[0] != '"') ||
(current_value[n] != '"')) /* sanity check */
return; /* not quoted string, may be macro */
(void)strcpy(namelist,"\"");/* supply initial quotation mark */
current_value[n] = (char)'\0'; /* clobber final quotation mark */
for (v = 1, m = 1; v < n; ++v) /* start past initial quotation mark */
{
if (strncmp(" and ",¤t_value[v],5) == 0)
{
current_value[v] = (char)'\0';
(void)strcat(namelist,fix_periods(fix_author(¤t_value[m])));
(void)strcat(namelist," and ");
current_value[v] = (char)' ';
v += 4;
m = v + 1;
}
else if ((Scribe == YES) && (current_value[v] == ';'))
{ /* expand semicolons to " and " */
current_value[v] = (char)'\0';
(void)strcat(namelist,fix_periods(fix_author(¤t_value[m])));
(void)strcat(namelist," and ");
current_value[v] = (char)' ';
m = v + 1;
}
}
(void)strcat(namelist,fix_periods(fix_author(¤t_value[m])));
/* handle last author */
(void)strcat(namelist,"\""); /* supply final quotation mark */
(void)strcpy(current_value,namelist);
}
static void
fix_pages(VOID)
{
size_t k; /* index into current_value[] */
size_t m; /* index into new_value[] */
char new_value[MAX_TOKEN_SIZE]; /* working copy of new_value[] */
for (m = 0, k = 0; current_value[k]; ++k)
{ /* squeeze out spaces around hyphens */
/* and convert hyphen runs to en-dashes */
if (current_value[k] == '-')
{ /* convert hyphens to en-dash */
for ( ; (m > 0) && isspace(new_value[m-1]) ; )
--m; /* discard preceding spaces */
for ( ; current_value[k+1] == '-'; )
++k;
for ( ; isspace(current_value[k+1]); )
++k; /* discard following spaces */
new_value[m++] = (char)'-'; /* save an en-dash */
new_value[m++] = (char)'-';
}
else
new_value[m++] = current_value[k];
}
new_value[m] = (char)'\0';
(void)strcpy(current_value,new_value);
}
#if NEW_STYLE
static char*
fix_periods(char *author)
#else /* K&R style */
static char*
fix_periods(author)
char *author;
#endif /* NEW_STYLE */
{
int b_level; /* brace level */
size_t a; /* index in author[] */
size_t n; /* index in name[] */
char *name = shared_string; /* memory-saving device */
if (fix_initials == NO)
return author;
/* Convert "J.K. Smith" to "J. K. Smith" if "." at brace level 0 */
for (b_level = 0, a = 0, n = 0; /* NO-OP (exit below) */ ; ++a, ++n)
{
name[n] = author[a]; /* copy character */
if (author[a] == '\0')
break; /* here's the loop exit */
switch (author[a])
{
case '{':
b_level++;
break;
case '}':
b_level--;
break;
case '.':
if (b_level == 0)
{
if ((a > 0) && isupper(author[a-1]) && isupper(author[a+1]))
name[++n] = (char)' '; /* supply space between initials */
}
break;
}
}
return (name);
}
static void
fix_title(VOID) /* protect upper-case acronyms */
{
YESorNO brace_letter;
int b_level; /* brace level */
size_t k; /* index into s[] */
char *s = shared_string; /* memory-saving device */
size_t t; /* index into title[] */
if (current_value[0] != '\"')
return; /* leave macros alone */
for (k = 0, b_level = 0, t = 0; current_value[t]; )
{
switch (current_value[t])
{
case '{':
b_level++;
s[k++] = current_value[t++];
break;
case '}':
b_level--;
s[k++] = current_value[t++];
break;
default:
if (b_level > 0)
brace_letter = NO; /* already braced, so no changes */
else if (isupper(current_value[t])) /* maybe brace <upper-case>+ */
{ /* or <upper-case><digits> */
if (isupper(current_value[t+1]) || isdigit(current_value[t+1]))
brace_letter = YES; /* XY -> {XY}, X11 -> {X11} */
else if (!isalpha(current_value[t+1]))
{
if ((t == 1) && (current_value[t] == 'A'))
brace_letter = NO; /* "A gnat" -> "A gnat" */
else
brace_letter = YES; /* "The C book" -> "The {C} Book" */
}
else
brace_letter = NO; /* everything else unchanged */
}
else
brace_letter = NO;
if (brace_letter)
{ /* Convert XWS to {XWS} and X11 to {X11} */
s[k++] = (char)'{';
while (isupper(current_value[t]) || isdigit(current_value[t]))
s[k++] = current_value[t++];
s[k++] = (char)'}';
}
else
s[k++] = current_value[t++];
break;
}
}
s[k] = (char)'\0';
check_length(k);
(void)strcpy(current_value,s);
}
static void
flush_inter_entry_space(VOID) /* standardize to 1 blank line between entries */
{
int c;
put_back((c = get_next_non_blank()));
if (c != EOF)
out_c('\n'), out_c('\n');
else
out_c('\n');
}
#if NEW_STYLE
static char*
format(const char *msg)
#else /* K&R style */
static char*
format(msg)
const char *msg;
#endif /* NEW_STYLE */
{ /* expand %k, %t, %v, and %% items in msg[], return pointer to new copy */
size_t k;
size_t len;
size_t n;
char *newmsg = shared_string; /* memory-saving device */
/* Shorthand for writable copy of msg[] with guaranteed NUL termination */
#define ORIGINAL_MESSAGE (strncpy(newmsg,msg,MAX_TOKEN_SIZE), \
newmsg[MAX_TOKEN_SIZE-1] = (char)'\0', newmsg)
for (k = 0, n = 0; msg[k]; ++k)
{
switch (msg[k])
{
case '%': /* expect valid format item */
switch (msg[++k])
{
case 'e': /* %e -> current_entry_name */
len = strlen(current_entry_name);
if ((n + len) >= MAX_TOKEN_SIZE)
return (ORIGINAL_MESSAGE); /* no space left*/
(void)strcpy(&newmsg[n],current_entry_name);
n += len;
break;
case 'k': /* %k -> current_key */
len = strlen(current_key);
if ((n + len) >= MAX_TOKEN_SIZE)
return (ORIGINAL_MESSAGE); /* no space left*/
(void)strcpy(&newmsg[n],current_key);
n += len;
break;
case 't': /* %t -> current_tag */
len = strlen(current_tag);
if ((n + len) >= MAX_TOKEN_SIZE)
return (ORIGINAL_MESSAGE); /* no space left*/
(void)strcpy(&newmsg[n],current_tag);
n += len;
break;
case 'v': /* %v -> current_value */
len = strlen(current_value);
if ((n + len) >= MAX_TOKEN_SIZE)
return (ORIGINAL_MESSAGE); /* no space left*/
(void)strcpy(&newmsg[n],current_value);
n += len;
break;
case '%': /* %% -> % */
newmsg[n++] = (char)'%';
break;
default:
return (ORIGINAL_MESSAGE); /* no space left*/
}
break;
default:
if (n >= MAX_TOKEN_SIZE)
return (ORIGINAL_MESSAGE); /* no space left*/
newmsg[n++] = msg[k];
break;
}
}
newmsg[n] = (char)'\0'; /* terminate string */
return (newmsg);
}
static char *
get_braced_string(VOID)
{
int b_level = 0; /* brace level */
int c; /* current input character */
size_t k; /* index into s[] */
size_t n; /* index into t[] */
char *s = shared_string; /* memory-saving device */
char t[MAX_TOKEN_SIZE]; /* working area for braced string */
for (c = get_char(), k = 0; c != EOF; )
{
if ((parbreaks == NO) && (is_parbreak == YES))
{
APPEND_CHAR(s,k,c);
out_with_parbreak_error(s);
return (EMPTY_STRING(s));
}
else if (k >= MAX_TOKEN)
{
APPEND_CHAR(s,k,c);
out_with_error(s, "BibTeX string too long for key ``%k''");
return (EMPTY_STRING(s));
}
else
{
if (isspace(c))
c = ' '; /* change whitespace to real space */
else if (c == '{')
b_level++;
else if (c == '}')
b_level--;
s[k++] = (char)c;
if (b_level == 0)
break; /* here's the loop exit */
c = isspace(c) ? get_next_non_blank() : get_char();
}
}
s[k] = (char)'\0';
/* Now convert braced string to quoted string */
for (b_level = 0, k = 0, n = 0; s[k]; ++k)
{
if (s[k] == '{')
b_level++;
else if (s[k] == '}')
b_level--;
if ((s[k] == '"') && (b_level == 1)) /* k > 0 if this is true */
{ /* so we can omit that check */
if (s[k-1] == '\\') /* change \"xy to {\"x}y */
n--,
t[n++] = (char)'{', t[n++] = (char)'\\', t[n++] = (char)'"',
t[n++] = s[++k], t[n++] = (char)'}';
else /* change x" to x{"} */
t[n++] = (char)'{', t[n++] = (char)'"', t[n++] = (char)'}';
}
else
t[n++] = s[k];
}
t[0] = (char)'"'; /* change initial and final */
APPEND_CHAR(t,n-1,'"'); /* braces to quotes */
check_length(n);
return (strcpy(s,t));
}
static int
get_char(VOID) /* all input is read through this function */
{
int c;
c = getc(fpin); /* NB: this is the ONLY place where input file is read! */
the_file.input.byte_position++;
/* Adjust global status and position values */
if (c == EOF)
eofile = YES;
else if (c == '\n')
{
the_file.input.line_number++;
the_file.input.last_column_position = the_file.input.column_position;
the_file.input.column_position = 0L;
non_white_chars = 0;
}
else if (!isspace(c))
{
the_file.input.last_column_position = the_file.input.column_position;
the_file.input.column_position++;
non_white_chars++;
}
else if (c == '\t')
{
the_file.input.last_column_position = the_file.input.column_position;
the_file.input.column_position =
(the_file.input.column_position + 8L) & ~07L;
}
else
{
the_file.input.last_column_position = the_file.input.column_position;
the_file.input.column_position++;
}
if (c == '{')
brace_level++;
else if (c == '}')
brace_level--;
#if defined(DEBUG)
if (fpdebug)
(void)fprintf(fpdebug,"[%c] %5ld %4ld %2ld\n",
c,
the_file.input.byte_position,
the_file.input.line_number,
the_file.input.column_position);
#endif /* defined(DEBUG) */
return (c);
}
static char *
get_digit_string(VOID)
{
int c; /* current input character */
size_t k; /* index into s[] */
char *s = shared_string; /* memory-saving device */
k = 0;
s[k++] = (char)'"'; /* convert to quoted string */
for (c = get_char(); (c != EOF) && isdigit(c); )
{
if (k >= MAX_TOKEN)
{
APPEND_CHAR(s,k,c);
out_with_error(s, "BibTeX string too long for key ``%k''");
return (EMPTY_STRING(s));
}
else
{
s[k++] = (char)c;
c = get_char();
}
}
put_back(c); /* we read past end of digit string */
s[k++] = (char)'"'; /* supply terminating quote */
s[k] = (char)'\0';
check_length(k);
return (s);
}
static char *
get_identifier_string(VOID)
{
int c; /* current input character */
size_t k; /* index into s[] */
char *s = shared_string; /* memory-saving device */
for (c = get_char(), k = 0; (c != EOF) && isidchar(c); )
{
if (k >= MAX_TOKEN)
{
APPEND_CHAR(s,k,c);
out_with_error(s, "BibTeX string too long for key ``%k''");
return (EMPTY_STRING(s));
}
else
{
s[k++] = (char)c;
c = get_char();
}
}
put_back(c); /* we read past end of identifier string */
s[k] = (char)'\0';
check_length(k);
return (s);
}
#if NEW_STYLE
static char*
get_line(FILE *fp)
#else /* K&R style */
static char*
get_line(fp)
FILE *fp;
#endif /* NEW_STYLE */
{ /* return a complete line to the caller, discarding backslash-newlines */
/* on consecutive lines, and discarding the final newline. At EOF, */
/* return (char*)NULL instead. */
static char line[MAX_LINE];
static char *p;
static char *more;
more = &line[0];
line[0] = (char)'\0'; /* must set in case we hit EOF */
while (fgets(more,(int)(&line[MAX_LINE] - more),fp) != (char *)NULL)
{
p = strchr(more,'\n');
if (p != (char*)NULL) /* did we get a complete line? */
{ /* yes */
*p = '\0'; /* zap final newline */
if (*(p-1) == '\\') /* then have backslash-newline */
more = p - 1; /* so get another line */
else /* otherwise have normal newline */
break; /* so return the current line */
}
else /* no, return partial line */
break;
}
return ((line[0] == '\0' && feof(fp)) ? (char*)NULL : &line[0]);
}
static int
get_next_non_blank(VOID)
{
int c;
int ff = 0;
int nl = 0;
while (((c = get_char()) != EOF) && isspace(c))
{
switch (c)
{
case '\n':
nl++;
break;
case '\f':
ff++;
break;
}
}
is_parbreak = ((nl > 1) || (ff > 0)) ? YES : NO;
return (c);
}
static char *
get_quoted_string(VOID)
{
int b_level = 0; /* brace level */
int c; /* current input character */
size_t k; /* index into s[] */
char *s = shared_string; /* memory-saving device */
for (c = get_char(), k = 0; c != EOF; )
{
if ((parbreaks == NO) && (is_parbreak == YES))
{
APPEND_CHAR(s,k,c);
out_with_parbreak_error(s);
return (EMPTY_STRING(s));
}
else if (k >= MAX_TOKEN)
{
APPEND_CHAR(s,k,c);
out_with_error(s, "BibTeX string too long for key ``%k''");
return (EMPTY_STRING(s));
}
else
{
if (isspace(c))
c = ' '; /* change whitespace to real space */
else if (c == '{')
b_level++;
else if (c == '}')
b_level--;
s[k++] = (char)c;
if ((c == '"') && (k > 1) && (b_level == 0))
{
if (s[k-2] == '\\')
{
/* convert \"x inside string at brace-level 0 to {\"x}: */
/* illegal, but hand-entered bibliographies have it */
c = get_char();
if (c != EOF)
{
k = k - 2;
s[k++] = (char)'{';
s[k++] = (char)'\\';
s[k++] = (char)'"';
s[k++] = (char)c;
s[k++] = (char)'}';
}
}
else
break; /* here's the loop exit */
}
c = isspace(c) ? get_next_non_blank() : get_char();
}
}
s[k] = (char)'\0';
check_length(k);
return (s);
}
static char *
get_Scribe_delimited_string(VOID)
{
int c;
int close_delim;
size_t k;
int last_c = EOF;
char *p;
char *s = shared_string; /* memory-saving device */
c = get_next_non_blank();
p = strchr(Scribe_open_delims,c); /* maybe delimited string? */
if (p == (char*)NULL)
{
APPEND_CHAR(s,0,c);
out_with_error(s,"Expected Scribe value string for key ``%k''");
return (EMPTY_STRING(s));
}
/* We have a delimited string */
close_delim = Scribe_close_delims[(int)(p - Scribe_open_delims)];
c = get_next_non_blank(); /* get first character in string */
/* ignoring leading space */
for (k = 0, s[k++] = (char)'"';
(c != EOF) &&
!((last_c != '\\') && (c == close_delim)) &&
(k < MAX_TOKEN);
k++)
{
if ((parbreaks == NO) && (is_parbreak == YES))
{
APPEND_CHAR(s,k,c);
out_with_parbreak_error(s);
return (EMPTY_STRING(s));
}
if (c == '"') /* protect quotes inside string */
{
if (s[k-1] == '\\')
{ /* then TeX accent in Scribe string */
last_c = c;
c = get_char();
if (c == '{') /* change \"{ to {\" */
{
s[k-1] = (char)'{';
s[k] = (char)'\\';
s[++k] = (char)'"';
}
else /* change \". to {\".} (. = any) */
{
s[k-1] = (char)'{';
s[k] = (char)'\\';
s[++k] = (char)'"';
s[++k] = (char)c;
s[++k] = (char)'}';
}
}
else
{
s[k] = (char)'{';
s[++k] = (char)'"';
s[++k] = (char)'}';
}
}
else if (isspace(c))
s[k] = (char)' '; /* change whitespace to real space */
else
s[k] = (char)c;
last_c = c;
c = isspace(c) ? get_next_non_blank() : get_char();
}
APPEND_CHAR(s,k,'"'); /* append close delimiter */
if (k >= MAX_TOKEN)
{
out_with_error(s, "Scribe string too long for key ``%k''");
return (EMPTY_STRING(s));
}
check_length(k);
return (s);
}
static char *
get_Scribe_identifier_string(VOID) /* read undelimited identifier */
{ /* and return quoted equivalent */
int c;
size_t k;
char *s = shared_string; /* memory-saving device */
c = get_next_non_blank();
for (k = 0, s[k++] = (char)'"'; isidchar(c) && (k < MAX_TOKEN);
k++, c = get_char())
{
s[k] = (char)c;
}
put_back(c); /* put back lookahead */
APPEND_CHAR(s,k,'"');
if (k >= MAX_TOKEN)
{
out_with_error(s, "Scribe number string too long for key ``%k''");
return (EMPTY_STRING(s));
}
check_length(++k);
return (s);
}
static char *
get_Scribe_string(VOID) /* read Scribe string */
{
int c;
c = get_next_non_blank(); /* peek ahead one character */
put_back(c);
return (isidchar(c) ?
get_Scribe_identifier_string() :
get_Scribe_delimited_string());
}
static char *
get_simple_string(VOID) /* read simple BibTeX string */
{
int c; /* current input character */
char *s = shared_string; /* memory-saving device */
c = get_next_non_blank(); /* peek ahead to next non-blank */
if ((parbreaks == NO) && (is_parbreak == YES))
{
APPEND_CHAR(s,0,c);
out_with_parbreak_error(s);
return (EMPTY_STRING(s));
}
put_back(c); /* put back lookahead */
if (c == '{')
return (get_braced_string());
else if (isdigit(c))
return (get_digit_string());
else if (c == '"')
return (get_quoted_string());
else if (isalpha(c))
return (get_identifier_string());
else
{
out_with_error("", "Expected BibTeX value string for key ``%k''");
return (EMPTY_STRING(s));
}
}
#if NEW_STYLE
static char*
get_token(char *s, char **nextp, const char *terminators)
#else /* K&R style */
static char*
get_token(s,nextp,terminators)
char *s;
char **nextp;
const char *terminators;
#endif /* NEW_STYLE */
{
char *t = s;
char *token;
/*******************************************************************
Ignoring leading space, find the next token in s[], stopping at
end-of-string, or one of the characters in terminators[],
whichever comes first. Replace the terminating character in s[]
by a NUL. Set *nextp to point to the next character in s[], or to
(char*)NULL if end-of-string was reached. Return (char*)NULL if
no token was found, or else a pointer to its start in s[]. The
job is terminated with an error message if a syntax error is
detected.
Quoted strings are correctly recognized as valid tokens, and
returned with their surrounding quotes removed, and embedded
escape sequences expanded. The comment character is recognized
outside quoted strings, but not inside.
*******************************************************************/
SKIP_SPACE(t);
if ((t == (char*)NULL) || (*t == '\0') || (*t == COMMENT_PREFIX))
{ /* initial sanity check */
t = (char*)NULL; /* save for *nextp later */
token = (char*)NULL;
}
else if (*t == '"') /* then collect quoted string */
{
token = ++t; /* drop leading quote */
for ( ; *t && (*t != '"'); ++t)
{ /* find ending quote */
/* step over escape sequences; it doesn't matter if we have */
/* \123, since we are only looking for the ending quote */
if (*t == '\\')
++t;
}
if (*t == '"') /* then found valid string */
{
*t++ = '\0'; /* terminate token */
do_escapes(token); /* and expand escape sequences */
}
else
{
(void)fprintf(stdlog,
"%s Bad line [%s] in initialization file [%s]\n",
ERROR_PREFIX, s, initialization_file_name);
exit(EXIT_FAILURE);
}
}
else /* else collect unquoted string */
{
for (token = t; *t && (*t != COMMENT_PREFIX) &&
(strchr(terminators,*t) == (char*)NULL); ++t)
NOOP; /* scan over token */
if ((*t == '\0') || (*t == COMMENT_PREFIX)) /* then hit end of s[] */
t = (char*)NULL; /* save for *nextp later */
else /* else still inside s[] */
*t++ = '\0'; /* terminate token */
}
*nextp = t; /* set continuation position */
return (token);
}
#if NEW_STYLE
static int
isidchar(int c)
#else /* K&R style */
static int
isidchar(c)
int c;
#endif /* NEW_STYLE */
{
/* See LaTeX User's Guide and Reference Manual, Section B.1.3, for the
rules of what characters can be used in a BibTeX word value. Section
30 of BibTeX initializes id_class[] to match this, but curiously,
allows ASCII DELete (0x3f), as an identifier character. This
irregularity has been reported to Oren Patashnik on
[06-Oct-1990]. We disallow it here.
The Scribe syntax is simpler: letters, digits, ., #, &, and %. */
return ((Scribe == YES) ?
(isalnum(c) || (c == '.') || (c == '#') ||
(c == '&') || (c == '%') ) :
(isgraph(c) && (strchr("\"#%'(),={}",c) == (char*)NULL)) );
}
#if (OS_PCDOS && (SCREEN_LINES > 0))
#include <conio.h> /* needed for getch() declaration */
static int
get_screen_lines(VOID)
{
return (SCREEN_LINES);
}
static void
kbclose(VOID)
{
}
static KEYCODE
kbcode(VOID)
{
int c;
c = kbget(); /* get from keyboard without echo */
if ((c == 0) || (c == 0xe0)) /* then have IBM PC function key */
{
c = kbget(); /* function key code */
switch (c) /* convert key code to character */
{
case 71: /* HOME */
return (KEY_HOME);
case 72: /* UP arrow */
return (KEY_UP);
case 73: /* PGUP */
return (KEY_PGUP);
case 79: /* END */
return (KEY_END);
case 80: /* DOWN arrow */
return (KEY_DOWN);
case 81: /* PGDN */
return (KEY_PGDN);
default:
return (KEY_UNKNOWN);
}
}
else if (c == EOF)
return (KEY_EOF);
else
return (keymap[(unsigned)c]);
}
static int
kbget(VOID)
{
return (getch());
}
static void
kbopen(VOID)
{
kbinitmap();
}
#endif /* (OS_PCDOS && (SCREEN_LINES > 0)) */
#if (OS_UNIX && (SCREEN_LINES > 0))
/* One of HAVE_SGTTY_H, HAVE_TERMIO_H, or HAVE_TERMIOS_H can be
defined at compile time. If more than one is set, we use the first
one set in that order. Usually, the UNIX_BSD or _POSIX_SOURCE values
are sufficient to distinguish between the three cases, and no
compile-time setting is necessary. DECstation ULTRIX has all three,
making it impossible to use symbols defined in sgtty.h, termio.h, and
termios.h to select code fragments below. */
#if !(defined(HAVE_SGTTY_H)||defined(HAVE_TERMIO_H)||defined(HAVE_TERMIOS_H))
#if UNIX_BSD
#define HAVE_SGTTY_H 1
#else /* NOT UNIX_BSD */
#if defined(_POSIX_SOURCE)
#define HAVE_TERMIOS_H 1
#else /* NOT BSD or POSIX, perhaps its AT&T System V */
#define HAVE_TERMIO_H 1
#endif /* defined(_POSIX_SOURCE) */
#endif /* UNIX_BSD */
#endif /* !(defined(HAVE_SGTTY_H) || defined(HAVE_TERMIO_H) ||
defined(HAVE_TERMIOS_H)) */
static void reset_terminal ARGS((void));
static void set_terminal ARGS((void));
static FILE *fptty = (FILE*)NULL; /* for kbxxx() functions */
static YESorNO tty_init = NO; /* set to YES if tty_save set */
static void
kbclose(VOID)
{
reset_terminal();
if (fptty != (FILE*)NULL)
(void)fclose(fptty);
}
static KEYCODE
kbcode(VOID)
{
int c = kbget();
if (c == EOF)
return (KEY_EOF);
else
return (keymap[(unsigned)c]);
}
static int
kbget(VOID)
{
if (fptty != (FILE*)NULL)
{
(void)fflush(fptty);
return (getc(fptty));
}
else
return (EOF);
}
static void
kbopen(VOID)
{
kbinitmap();
if ((fptty = tfopen("/dev/tty","r")) != (FILE*)NULL)
{
set_terminal();
screen_lines = get_screen_lines();
}
}
#if defined(HAVE_SGTTY_H)
#undef HAVE_TERMIO_H
#undef HAVE_TERMIOS_H
#include <sgtty.h>
#include <sys/ioctl.h>
static struct sgttyb tty_save; /* Berkeley style interface */
static void
reset_terminal(VOID) /* restored saved terminal modes */
{
if (tty_init == YES)
(void)ioctl((int)(fileno(fptty)),(int)TIOCSETP,(char*)&tty_save);
}
static void
set_terminal(VOID) /* set terminal for cbreak input mode */
{
struct sgttyb tty;
/* Try to put file into cbreak mode for character-at-a-time input */
if (ioctl((int)(fileno(fptty)),(int)TIOCGETP,(char*)&tty) != -1)
{
tty_save = tty;
tty_init = YES;
tty.sg_flags &= ~(ECHO | LCASE);
tty.sg_flags |= CBREAK;
(void)ioctl((int)(fileno(fptty)),(int)TIOCSETP,(char*)&tty);
}
}
#endif /* defined(HAVE_SGTTY_H) */
#if defined(HAVE_TERMIO_H)
#undef HAVE_SGTTY_H
#undef HAVE_TERMIOS_H
#include <termio.h>
static struct termio tty_save; /* SVID2 and XPG2 interface */
static void
reset_terminal(VOID) /* restore saved modes */
{
if (tty_init == YES)
(void)ioctl((int)(fileno(fptty)),(int)TCSETAF,(char*)&tty_save);
}
static void
set_terminal(VOID) /* set to cbreak input mode */
{
struct termio tty; /* SVID2, XPG2 interface */
if (ioctl((int)(fileno(fptty)),(int)TCGETA,(char*)&tty) != -1)
{
tty_save = tty;
tty_init = YES;
tty.c_iflag &= ~(INLCR | ICRNL | ISTRIP | IXON | BRKINT);
#if defined(IUCLC)
tty.c_iflag &= ~IUCLC; /* absent from POSIX */
#endif /* defined(IUCLC) */
tty.c_lflag &= ~(ECHO | ICANON);
tty.c_cc[4] = 5; /* MIN */
tty.c_cc[5] = 2; /* TIME */
(void)ioctl((int)(fileno(fptty)),(int)TCSETAF,(char*)&tty);
}
}
#endif /* HAVE_TERMIO_H */
#if defined(HAVE_TERMIOS_H)
#undef HAVE_SGTTY_H
#undef HAVE_TERMIO_H
#include <termios.h>
static struct termios tty_save; /* XPG3, POSIX.1, FIPS 151-1 interface */
static void
reset_terminal(VOID) /* restore saved modes */
{
if (tty_init == YES)
(void)tcsetattr((int)(fileno(fptty)),TCSANOW,&tty_save);
}
static void
set_terminal(VOID) /* set to cbreak input mode */
{
struct termios tty; /* XPG3, POSIX.1, FIPS 151-1 interface */
if (tcgetattr((int)(fileno(fptty)),&tty) != -1)
{
tty_save = tty;
tty_init = YES;
tty.c_iflag &= ~(INLCR | ICRNL | ISTRIP | IXON | BRKINT);
#if defined(IUCLC)
tty.c_iflag &= ~IUCLC; /* absent from POSIX */
#endif /* defined(IUCLC) */
tty.c_lflag &= ~(ECHO | ICANON);
tty.c_cc[VMIN] = 5; /* MIN */
tty.c_cc[VTIME] = 2; /* TIME */
(void)tcsetattr((int)(fileno(fptty)),TCSANOW,&tty);
}
}
#endif /* defined(HAVE_TERMIOS_H) */
static int
get_screen_lines(VOID) /* this must come after terminal header includes! */
{
#if defined(TIOCGWINSZ)
struct winsize window_size;
if (fptty != (FILE*)NULL)
{
(void)ioctl((int)(fileno(fptty)),(int)TIOCGWINSZ,&window_size);
if (window_size.ws_row > 0)
return ((int)window_size.ws_row);
}
#else /* defined(TIOCGWINSZ) */
/* some systems store screen lines in environment variables */
char *p;
int n;
if (((p = getenv("ROWS")) != (char*)NULL) ||
((p = getenv("LINES")) != (char*)NULL))
{
n = (int)atoi(p);
if (n > 0)
return (n);
}
#endif /* defined(TIOCGWINSZ) */
return (SCREEN_LINES);
}
#endif /* (OS_UNIX && (SCREEN_LINES > 0)) */
#if (OS_VAXVMS && (SCREEN_LINES > 0))
#include <ssdef.h>
#include <descrip.h>
#include <iodef.h>
#include <ttdef.h>
#include <tt2def.h>
#define TTYOPENFLAGS "rb"
#define TTYNAME ctermid((char*)NULL)
static int status; /* system service status */
static int tt_channel = -1; /* terminal channel for image QIO's */
static int iomask; /* QIO flag mask */
static $DESCRIPTOR(sys_in,"TT:"); /* terminal descriptor */
static struct
{
unsigned char class;
unsigned char type;
unsigned short buffer_size;
unsigned long tt;
unsigned long tt2;
} mode_buf,mode_save;
#define FAILED(status) (~(status) & 1) /* failure if LSB is 0 */
static int
get_screen_lines(VOID)
{
short flags;
short dvtype;
short ncols;
short nrows = 0;
(void)lib$screen_info(&flags,&dvtype,&ncols,&nrows);
return ((int)((nrows > 0) ? nrows : SCREEN_LINES));
}
static void
kbclose(VOID)
{
(void)sys$qiow(0,tt_channel,IO$_SETMODE,0,0,0, &mode_save,12,0,0,0,0);
}
static KEYCODE
kbcode(VOID)
{
int c = kbget();
return ((c == EOF) ? KEY_EOF : keymap[(unsigned)c]);
}
static int
kbget(VOID)
{
int c;
status = sys$qiow(0,tt_channel,iomask,0,0,0,&c,1,0,0,0,0);
return ((int)(FAILED(status) ? EOF : (c & 0xff)));
}
static void
kbopen(VOID)
{
kbinitmap();
status = sys$assign(&sys_in,&tt_channel,0,0);
if (!FAILED(status))
{
(void)sys$qiow(0,tt_channel,IO$_SENSEMODE,0,0,0,&mode_save,12,0,0,0,0);
mode_buf = mode_save;
mode_buf.tt &= ~TT$M_WRAP;
(void)sys$qiow(0,tt_channel,IO$_SETMODE,0,0,0,&mode_buf,12,0,0,0,0);
iomask = IO$_TTYREADALL | IO$M_NOECHO;
}
}
#endif /* (OS_VAXVMS && (SCREEN_LINES > 0)) */
#if (SCREEN_LINES > 0)
static void
kbinitmap(VOID)
{
(void)memset((void*)&keymap[0],(int)KEY_UNKNOWN,sizeof(keymap));
keymap[(unsigned)'b'] = KEY_PGUP;
keymap[(unsigned)'B'] = KEY_PGUP;
keymap[(unsigned)META('V')] = KEY_PGUP; /* Emacs scroll-down */
keymap[(unsigned)'d'] = KEY_DOWN;
keymap[(unsigned)'D'] = KEY_DOWN;
keymap[(unsigned)CTL('N')] = KEY_DOWN; /* Emacs next-line*/
keymap[(unsigned)'e'] = KEY_END;
keymap[(unsigned)'E'] = KEY_END;
keymap[(unsigned)META('>')] = KEY_HOME; /* Emacs end-of-buffer */
keymap[(unsigned)'f'] = KEY_PGDN;
keymap[(unsigned)'F'] = KEY_PGDN;
keymap[(unsigned)' '] = KEY_PGDN;
keymap[(unsigned)'\r'] = KEY_PGDN;
keymap[(unsigned)'\n'] = KEY_PGDN;
keymap[(unsigned)CTL('V')] = KEY_PGDN; /* Emacs scroll-up */
keymap[(unsigned)'h'] = KEY_HELP;
keymap[(unsigned)'H'] = KEY_HELP;
keymap[(unsigned)'?'] = KEY_HELP;
keymap[(unsigned)CTL('H')] = KEY_HELP; /* Emacs help */
keymap[(unsigned)'\033'] = KEY_QUIT; /* ESCape gets out */
keymap[(unsigned)'q'] = KEY_QUIT;
keymap[(unsigned)'Q'] = KEY_QUIT;
keymap[(unsigned)'.'] = KEY_AGAIN;
keymap[(unsigned)'r'] = KEY_AGAIN;
keymap[(unsigned)'R'] = KEY_AGAIN;
keymap[(unsigned)CTL('L')] = KEY_AGAIN; /* Emacs recenter */
keymap[(unsigned)'t'] = KEY_HOME;
keymap[(unsigned)'T'] = KEY_HOME;
keymap[(unsigned)META('<')] = KEY_HOME; /* Emacs beginning-of-buffer */
keymap[(unsigned)'u'] = KEY_UP;
keymap[(unsigned)'U'] = KEY_UP;
keymap[(unsigned)CTL('P')] = KEY_UP; /* Emacs previous-line */
}
#endif /* (SCREEN_LINES > 0) */
#if NEW_STYLE
int
main(int argc, char *argv[])
#else /* K&R style */
int
main(argc,argv)
int argc;
char *argv[];
#endif /* NEW_STYLE */
{
#if defined(vms)
extern char **cmd_lin();
argv = cmd_lin( "", &argc, argv );
#endif /* defined(vms) */
stdlog = stderr; /* cannot assign at compile time on some systems */
program_name = argv[0];
check_inodes();
#if defined(DEBUG)
fpdebug = tfopen("bibclean.dbg", "w");
#endif /* defined(DEBUG) */
the_file.output.filename = "stdout";
do_preargs(argc,argv);/* some args must be handled BEFORE initializations */
if (read_initialization_files == YES)
do_initfile(getenv(SYSPATH),INITFILE);
if (read_initialization_files == YES)
do_initfile(getenv(USERPATH),INITFILE);
do_args(argc,argv);
do_files(argc,argv);
#if OS_VAXVMS
exit (error_count ? EXIT_FAILURE : EXIT_SUCCESS);
#endif /* OS_VAXVMS */
return (error_count ? EXIT_FAILURE : EXIT_SUCCESS);
}
#if NEW_STYLE
static void
memmove(void *target, const void *source, size_t n)
#else /* K&R style */
static void
memmove(target, source, n)
void *target;
const void *source;
size_t n;
#endif /* NEW_STYLE */
{
char *t;
const char *s;
t = (char *)target;
s = (const char*)source;
if ((s <= t) && (t < (s + n))) /* overlap: need right to left copy */
{
for (t = ((char *)target) + n - 1, s = ((const char*)source) + n - 1;
n > 0; --n)
*t-- = *s--;
}
else /* left to right copy is okay */
{
for ( ; n > 0; --n)
*t++ = *s++;
}
}
#if (defined(BSD) || defined(__SUNCC__))
#if NEW_STYLE
void*
memset(void *target, int value, size_t n)
#else /* K&R style */
void*
memset(target, value, n)
void *target;
int value;
size_t n;
#endif /* NEW_STYLE */
{
unsigned char *t = (unsigned char*)target;
for ( ; n > 0; --n)
*t++ = (unsigned char)value;
return (target);
}
#endif /* (defined(BSD) || defined(__SUNCC__)) */
static void
new_entry(VOID) /* initialize for new BibTeX @name{...} */
{
at_level = 0;
brace_level = 0;
rflag = NO; /* already synchronized */
}
#if NEW_STYLE
static void
new_io_pair(IO_PAIR *pair)
#else /* K&R style */
static void
new_io_pair(pair)
IO_PAIR *pair;
#endif /* NEW_STYLE */
{
new_position(&pair->input);
new_position(&pair->output);
}
#if NEW_STYLE
static void
new_position(POSITION *position)
#else /* K&R style */
static void
new_position(position)
POSITION *position;
#endif /* NEW_STYLE */
{
position->byte_position = 0L;
position->last_column_position = 0L;
position->column_position = 0L;
position->line_number = 1L;
}
static void
opt_author(VOID)
{
static CONST char *author[] =
{
"Author:\n",
"\tNelson H. F. Beebe\n",
"\tCenter for Scientific Computing\n",
"\tDepartment of Mathematics\n",
"\tUniversity of Utah\n",
"\tSalt Lake City, UT 84112\n",
"\tUSA\n",
"\tTel: +1 801 581 5254\n",
"\tFAX: +1 801 581 4801\n",
"\tEmail: <beebe@math.utah.edu>\n",
(const char*)NULL,
};
out_lines(stdlog, author, NO);
}
static void
opt_check_values(VOID)
{
check_values = YESorNOarg();
}
static void
opt_delete_empty_fields(VOID)
{
delete_empty_fields = YESorNOarg();
}
static void
opt_error_log(VOID)
{
current_index++;
if ((stdlog = tfopen(next_option,"w")) == (FILE*)NULL)
{
fprintf(stderr, "%s cannot open error log file [%s]",
WARNING_PREFIX, next_option);
fprintf(stderr, " -- using stderr instead\n");
stdlog = stderr;
}
else
check_inodes(); /* stdlog changed */
}
static void
opt_file_position(VOID)
{
show_file_position = YESorNOarg();
}
static void
opt_fix_initials(VOID)
{
fix_initials = YESorNOarg();
}
static void
opt_fix_names(VOID)
{
fix_names = YESorNOarg();
}
static void
opt_help(VOID)
{
static CONST char *help_lines[] =
{
"\nUsage: ",
(const char*)NULL,
" [ -author ] [ -error-log filename ] [ -help ] [ '-?' ]\n",
"\t[ -init-file filename ] [ -[no-]check-values ]\n",
"\t[ -[no-]delete-empty-fields ] [ -[no-]file-position ]\n",
"\t[ -[no-]fix-initials ] [ -[no-]fix-names ]\n",
"\t[ -[no-]par-breaks ] [ -[no-]print-patterns ]\n",
"\t[ -[no-]read-init-files ] [ -[no-]remove-OPT-prefixes ]\n",
"\t[ -[no-]scribe ] [ -[no-]trace-file-opening ] [ -[no-]warnings ]\n",
"\t[ -version ]\n",
"\t[ <infile or bibfile1 bibfile2 bibfile3 ...] >outfile\n",
"\n",
#include "bibclean.h"
};
help_lines[1] = program_name; /* cannot have this in initializer */
out_lines(stdlog, help_lines, (screen_lines > 0) ? YES : NO);
exit(EXIT_SUCCESS);
}
static void
opt_init_file(VOID)
{
current_index++;
do_initfile((const char*)NULL,next_option);
}
static void
opt_parbreaks(VOID)
{
parbreaks = YESorNOarg();
}
static void
opt_print_patterns(VOID)
{
print_patterns = YESorNOarg();
}
static void
opt_read_init_files(VOID)
{
read_initialization_files = YESorNOarg();
}
static void
opt_remove_OPT_prefixes(VOID)
{
remove_OPT_prefixes = YESorNOarg();
}
static void
opt_scribe(VOID)
{
Scribe = YESorNOarg();
}
static void
opt_trace_file_opening(VOID)
{
trace_file_opening = YESorNOarg();
}
static void
opt_version(VOID)
{
version();
}
static void
opt_warnings(VOID)
{
warnings = YESorNOarg();
}
static void
out_equals(VOID)
{
out_c(' ');
out_c('='); /* standardize to = */
out_c(' '); /* always surround = by spaces */
}
#if NEW_STYLE
static void
out_error(FILE *fpout, const char *s)
#else /* K&R style */
static void
out_error(fpout, s)
FILE *fpout;
const char *s;
#endif /* NEW_STYLE */
{
if (fpout == stdout) /* private handling of stdout so we */
out_s(s); /* can track positions */
else
(void)fputs(s,fpout);
}
static void
out_flush(VOID) /* flush buffered output */
{
out_c(EOF); /* magic value to flush buffers */
}
static void
out_key(VOID)
{
out_spaces(KEY_INDENTATION);
out_s(current_key);
}
#if NEW_STYLE
static void
out_lines(FILE *fpout, const char *lines[], YESorNO pause)
#else /* K&R style */
static void
out_lines(fpout, lines, pause)
FILE *fpout;
const char *lines[];
YESorNO pause;
#endif /* NEW_STYLE */
{
int k;
#if (SCREEN_LINES > 0)
int lines_on_screen;
int nlines;
if (pause == YES)
{
kbopen();
for (nlines = 0; lines[nlines] != (const char*)NULL; ++nlines)
NOOP; /* count number of lines */
for (k = 0, lines_on_screen = 0; ; )
{
if (lines[k] != (const char*)NULL)
{
(void)fputs(lines[k], fpout);
if (strchr(lines[k],'\n') != (char*)NULL)
lines_on_screen++; /* some lines[k] are only partial */
}
if ((lines_on_screen == (screen_lines - 2)) ||
(lines[k] == (const char*)NULL))
{ /* pause for user action */
lines_on_screen = 0;
screen_lines = get_screen_lines(); /* maybe window resize? */
k = do_more(fpout,k,screen_lines - 2);
if (k == EOF)
break; /* here's the loop exit */
else if (k == LAST_SCREEN_LINE)
k = nlines - (screen_lines - 2);
if (k < 0) /* ensure k stays in range */
k = 0;
else if (k >= nlines)
k = nlines - 1;
}
else /* still filling current screen */
k++;
} /* end for (k...) */
kbclose();
}
else /* pause == NO */
{
for (k = 0; lines[k] != (const char*)NULL; k++)
(void)fputs(lines[k], fpout);
}
#else /* NOT (SCREEN_LINES > 0) */
for (k = 0; lines[k] != (const char*)NULL; k++)
(void)fputs(lines[k], fpout);
#endif /* (SCREEN_LINES > 0) */
}
#if NEW_STYLE
static void
out_position(FILE* fpout, const char *msg, IO_PAIR *the_location)
#else /* K&R style */
static void
out_position(fpout,msg,the_location)
FILE* fpout;
const char *msg;
IO_PAIR *the_location;
#endif /* NEW_STYLE */
{
char s[sizeof(
" output byte=XXXXXXXXXX line=XXXXXXXXXX column=XXXXXXXXXX")+1];
out_error(fpout, msg);
(void)sprintf(s," input byte=%ld line=%ld column=%2ld",
the_location->input.byte_position,
the_location->input.line_number,
the_location->input.column_position);
out_error(fpout, s);
(void)sprintf(s, " output byte=%ld line=%ld column=%2ld\n",
the_location->output.byte_position,
the_location->output.line_number,
the_location->output.column_position);
out_error(fpout, s);
}
#if NEW_STYLE
static void
out_s(const char *s) /* output a string, wrapping long lines */
#else /* K&R style */
static void
out_s(s) /* output a string, wrapping long lines */
const char *s;
#endif /* NEW_STYLE */
{
/* The strings s[] has already had runs of whitespace of all kinds
collapsed to single spaces. The word_length() function returns 1
more than the actual non-blank word length at end of string, so
that we can automatically account for the comma that will be
supplied after the string. */
for (; *s; ++s)
{
switch (*s)
{
case ' ': /* may change space to new line and indent */
if ((the_file.output.column_position + 1 + word_length(s+1))
> MAX_COLUMN)
wrap_line();
else
out_c(*s);
break;
case '!': /* may wrap after certain punctuation */
case '&':
case '+':
case ',':
case '.':
case ':':
case ';':
case '=':
case '?':
out_c(*s);
if ((the_file.output.column_position + word_length(s+1))
> MAX_COLUMN)
wrap_line();
break;
default: /* everything else is output verbatim */
out_c(*s);
}
}
}
#if NEW_STYLE
static void
out_spaces(int n)
#else /* K&R style */
static void
out_spaces(n)
int n;
#endif /* NEW_STYLE */
{
for (; n > 0; --n)
out_c(' ');
}
#if NEW_STYLE
static void
out_status (FILE* fpout,const char *prefix)
#else /* K&R style */
static void
out_status(fpout,prefix)
FILE* fpout;
const char *prefix;
#endif /* NEW_STYLE */
{
if (show_file_position == YES)
{
out_error(fpout, prefix);
out_error(fpout, " File positions: input [");
out_error(fpout, the_file.input.filename);
out_error(fpout, "] output [");
out_error(fpout, the_file.output.filename);
out_error(fpout, "]\n");
out_error(fpout, prefix);
out_position(fpout, " Entry ", &the_entry);
out_error(fpout, prefix);
out_position(fpout, " Value ", &the_value);
out_error(fpout, prefix);
out_position(fpout, " Current", &the_file);
}
}
static void
out_value(VOID)
{
static KEY_FUNCTION_ENTRY checks[] =
{
{"author", 6, check_other},
{"chapter", 7, check_chapter},
{"ISBN", 4, check_ISBN},
{"ISSN", 4, check_ISSN},
{"month", 5, check_month},
{"number", 6, check_number},
{"pages", 5, check_pages},
{"volume", 6, check_volume},
{"year", 4, check_year},
{(const char*)NULL, 0, (void (*)(VOID))NULL},
};
static KEY_FUNCTION_ENTRY fixes[] =
{
{"author", 6, fix_namelist},
{"editor", 6, fix_namelist},
{"month", 5, fix_month},
{"pages", 5, fix_pages},
{"title", 5, fix_title},
{(const char*)NULL, 0, (void (*)(VOID))NULL},
};
trim_value();
(void)apply_function(current_key,fixes);
if (check_values == YES)
{
if (apply_function(current_key,checks) == NO)
check_other();
}
if ((remove_OPT_prefixes == YES) &&
(strncmp(current_key,"OPT",3) == 0) &&
(strlen(current_key) > 3) &&
(strlen(current_value) > 2)) /* 2, not 0: quotes are included! */
{
out_c(DELETE_LINE);
memmove(current_key,¤t_key[3],(size_t)(strlen(current_key)-3+1));
/* reduce "OPTname" to "name" */
out_key();
out_equals();
out_spaces((int)(VALUE_INDENTATION - the_file.output.column_position));
}
else if ((delete_empty_fields == YES) && (strlen(current_value) <= 2))
{ /* 2, not 0, because quotes are included! */
out_c(DELETE_LINE);
discard_next_comma = YES;
return;
}
out_s(current_value);
check_length(strlen(current_value));
}
#if NEW_STYLE
static void
out_with_error(const char *s, const char *msg)
#else /* K&R style */
static void
out_with_error(s,msg) /* output string s, error message, and resynchronize */
const char *s;
const char *msg;
#endif /* NEW_STYLE */
{
out_s(s);
error(msg);
resync();
}
#if NEW_STYLE
static void
out_with_parbreak_error(char *s)
#else /* K&R style */
static void
out_with_parbreak_error(s)
char *s;
#endif /* NEW_STYLE */
{
out_with_error(s, "Unexpected paragraph break for key ``%k''");
}
#if NEW_STYLE
static void
prt_pattern(const char *keyname, const char *pattern, const char *message)
#else /* K&R style */
static void
prt_pattern(keyname,pattern,message)
const char *keyname;
const char *pattern;
const char *message;
#endif /* NEW_STYLE */
{
if (print_patterns == YES)
{
if ((pattern == (const char*)NULL) || (*pattern == '\0'))
(void)fprintf(stdlog,
"\nfile=[%s] key=[%-12s] existing patterns discarded\n\n",
initialization_file_name, keyname);
else if (message == (char*)NULL)
(void)fprintf(stdlog,
"file=[%s] key=[%-12s] pattern=[%s]\n",
initialization_file_name, keyname, pattern);
else
(void)fprintf(stdlog,
"file=[%s] key=[%-12s] pattern=[%s] message[%s]\n",
initialization_file_name, keyname, pattern, message);
}
}
#if NEW_STYLE
static void
put_back(int c) /* put last get_char() value back onto input stream */
#else /* K&R style */
static void
put_back(c) /* put last get_char() value back onto input stream */
int c;
#endif /* NEW_STYLE */
{
ungetc(c,fpin);
the_file.input.byte_position--;
/* Adjust status values that are set in get_char() */
if (!isspace(c))
non_white_chars--;
if (c == EOF)
eofile = NO;
else if (c == '\n')
{
the_file.input.column_position = the_file.input.last_column_position;
the_file.input.line_number--;
}
else if (c == '\t')
the_file.input.column_position = the_file.input.last_column_position;
else
the_file.input.column_position--;
if (c == '{')
brace_level--;
else if (c == '}')
brace_level++;
}
#if NEW_STYLE
static void
put_back_string(const char *s) /* put string value back onto input stream */
#else /* K&R style */
static void
put_back_string(s) /* put string value back onto input stream */
const char *s;
#endif /* NEW_STYLE */
{
char *p;
for (p = strchr(s,'\0') - 1; p >= s; p--)
put_back(*p);
}
#if NEW_STYLE
static void /* output c, but trim trailing blanks, */
put_char(int c) /* and output buffer if c == EOF */
#else /* K&R style */
static void
put_char(c) /* output c, but trim trailing blanks, */
int c; /* and output buffer if c == EOF */
#endif /* NEW_STYLE */
{
static int buf_length = 0;
static char buf[MAX_BUFFER+1]; /* 1 extra slot for trailing NUL */
the_file.output.byte_position++;
if ((c == EOF) || (buf_length >= MAX_BUFFER))
{
buf[buf_length] = (char)'\0';
if (buf_length > 0)
{
(void)fputs(buf,stdout);
(void)fflush(stdout);
buf_length = 0;
}
if (c == EOF)
return;
}
switch (c)
{
case '\n': /* trim trailing spaces */
the_file.output.line_number++;
the_file.output.column_position = 0L;
while ((buf_length > 0) && (buf[buf_length-1] == ' '))
{
the_file.output.byte_position--;
buf_length--;
}
the_file.input.last_column_position =
the_file.input.column_position - 1;
/* inexact if we trimmed tabs. */
break;
case '\t':
the_file.input.last_column_position = the_file.input.column_position;
the_file.output.column_position =
(the_file.output.column_position + 8L) & ~07L;
break;
case DELETE_CHAR: /* delete a character from the output */
if (buf_length <= 0) /* this should NEVER happen! */
fatal("Internal error: too many output characters deleted");
if (buf[buf_length] == '\n')
the_file.output.line_number--;
the_file.output.column_position--; /* inexact if tab deleted */
the_file.output.byte_position--;
buf_length--;
return; /* don't store this character! */
case DELETE_LINE: /* delete back to beginning of line */
while ((buf_length > 0) && (buf[buf_length-1] != '\n'))
{
buf_length--;
the_file.output.byte_position--;
}
the_file.output.column_position = 0;
return; /* don't store this character! */
default:
the_file.input.last_column_position = the_file.input.column_position;
the_file.output.column_position++;
break;
} /* end switch (c) */
buf[buf_length++] = (char)c;
}
static void
resync(VOID) /* copy input to output until new entry met */
{ /* and set resynchronization flag */
rflag = YES;
do_other(); /* copy text until new entry found */
}
#if NEW_STYLE
char*
strdup(const char *s)
#else /* K&R style */
char*
strdup(s)
const char *s;
#endif /* NEW_STYLE */
{
char *p;
p = (char*)malloc(strlen(s)+1);
if (p == (char*)NULL)
fatal("Out of string memory");
return (strcpy(p,s));
}
#if NEW_STYLE
int
strnicmp(const char *s1, const char *s2, size_t n)
#else /* K&R style */
int
strnicmp(s1,s2,n)
const char *s1;
const char *s2;
size_t n;
#endif /* NEW_STYLE */
{
int c1;
int c2;
/*******************************************************************
Compare strings ignoring case, stopping after n characters, or at
end-of-string, whichever comes first.
*******************************************************************/
for (; (n > 0) && *s1 && *s2; ++s1, ++s2, --n)
{
c1 = 0xff & (int)(islower(*s1) ? (int)*s1 : tolower(*s1));
c2 = 0xff & (int)(islower(*s2) ? (int)*s2 : tolower(*s2));
if (c1 < c2)
return (-1);
else if (c1 > c2)
return (1);
}
if (n <= 0) /* first n characters match */
return (0);
else if (*s1 == '\0')
return ((*s2 == '\0') ? 0 : -1);
else /* (*s2 == '\0') */
return (1);
}
#if NEW_STYLE
static FILE*
tfopen(const char *filename, const char *mode) /* traced file opening */
#else /* K&R style */
static FILE*
tfopen(filename,mode)
const char *filename;
const char *mode;
#endif /* NEW_STYLE */
{
FILE *fp;
fp = FOPEN(filename,mode);
if (trace_file_opening == YES)
(void)fprintf(stdlog,"%s open file [%s]%s\n",
WARNING_PREFIX, filename, (fp == (FILE*)NULL) ? ": FAILED" : "");
return (fp);
}
static void
trim_value(VOID)
{ /* trim leading and trailing space from current_value[] */
size_t k;
size_t n = strlen(current_value);
if ((current_value[0] == '"') && isspace(current_value[1]))
{ /* then quoted string value with leading space*/
for (k = 1; (k < n) && isspace(current_value[k]); ++k)
NOOP;
memmove(¤t_value[1], ¤t_value[k], (size_t)(n + 1 - k));
/* copy includes trailing NULL */
n = strlen(current_value);
}
if (current_value[n-1] == '"')
{
for (k = n; (k > 1) && isspace(current_value[k-2]); --k)
NOOP;
current_value[k-1] = (char)'"';
current_value[k] = (char)'\0';
}
}
static void
unexpected(VOID)
{
warning("Unexpected value in ``%k = %v''");
}
static void
usage(VOID)
{
static CONST char *usage_lines[] =
{
"\nUsage: ",
(const char*)NULL,
" [ -author ] [ -error-log filename ] [ -help ] [ '-?' ]\n",
"\t[ -init-file filename ] [ -[no-]check-values ]\n",
"\t[ -[no-]delete-empty-fields ] [ -[no-]file-position ]\n",
"\t[ -[no-]fix-initials ] [ -[no-]fix-names ]\n",
"\t[ -[no-]par-breaks ] [ -[no-]print-patterns ]\n",
"\t[ -[no-]read-init-files ] [ -[no-]remove-OPT-prefixes ]\n",
"\t[ -[no-]scribe ] [ -[no-]trace-file-opening ] [ -[no-]warnings ]\n",
"\t[ -version ]\n",
"\t[ <infile or bibfile1 bibfile2 bibfile3 ...] >outfile\n",
(const char*)NULL,
};
version();
usage_lines[1] = program_name; /* cannot have this in initializer */
out_lines(stdlog, usage_lines, NO);
}
static void
version(VOID)
{
static CONST char *version_string[] =
{
BIBCLEAN_VERSION,
"\n",
#if defined(HOST) || defined(USER) || defined(__DATE__) || defined(__TIME__)
"Compiled",
#if defined(USER)
" by <", USER,
#if defined(HOST)
"@", HOST,
#endif /* defined(HOST) */
">",
#endif /* defined(USER) */
#if defined(__DATE__)
" on ", __DATE__,
#endif /* defined(__DATE__) */
#if defined(__TIME__)
" ", __TIME__,
#endif /* defined(__TIME__) */
#if defined(HAVE_PATTERNS)
"\nwith native pattern matching",
#endif /* defined(HAVE_PATTERNS) */
#if defined(HAVE_RECOMP) || defined(HAVE_REGEXP)
"\nwith regular-expression pattern matching",
#endif /* defined(HAVE_RECOMP) || defined(HAVE_REGEXP) */
#if defined(HAVE_OLDCODE)
"\nwith old matching code",
#endif /* defined(HAVE_OLDCODE) */
"\n",
#endif /* defined(HOST)||defined(USER)||defined(__DATE__)||defined(__TIME__) */
(const char*)NULL,
};
out_lines(stdlog, version_string, NO);
}
#if NEW_STYLE
static void
warning(const char *msg) /* issue a warning message to stdlog */
#else /* K&R style */
static void
warning(msg) /* issue a warning message to stdlog */
const char *msg;
#endif /* NEW_STYLE */
{
if (warnings == YES)
{
out_flush(); /* flush all buffered output */
/* Because warnings are often issued in the middle of lines, we
start a new line if stdlog and stdout are the same file. */
(void)fprintf(stdlog,"%s%s \"%s\", line %ld: %s.\n",
(stdlog_on_stdout == YES) ? "\n" : "",
WARNING_PREFIX, the_file.input.filename,
the_value.input.line_number, format(msg));
out_status(stdlog, WARNING_PREFIX);
(void)fflush(stdlog);
}
}
#if NEW_STYLE
static int
word_length(const char *s) /* return length of leading non-blank prefix */
#else /* K&R style */
static int
word_length(s) /* return length of leading non-blank prefix */
const char *s;
#endif /* NEW_STYLE */
{
size_t n;
for (n = 0; s[n]; ++n)
{
if (isspace(s[n]))
break;
}
return ((int)((s[n] == '\0') ? n + 1 : n));
/* at end of string, return one more than */
/* true length to simplify line wrapping */
}
static void
wrap_line(VOID) /* insert a new line and leading indentation */
{
out_c('\n');
out_spaces(VALUE_INDENTATION); /* supply leading indentation */
}
static YESorNO
YESorNOarg(VOID)
{
return ((strnicmp(current_option+1,"no-",3) == 0) ? NO : YES);
}
/***********************************************************************
We put this regular expression matching code last because
(a) it is not universally available,
(b) the 6 macros in the HAVE_REGEXP section can only be defined
once, and
(c) there are three variants: the old ugly regexp.h interface (HAVE_REGEXP),
the new clean regex.h interface (HAVE_RECOMP), and the GNU version
(not yet supported here)
***********************************************************************/
/**********************************************************************/
#if defined(HAVE_RECOMP)
#if (_AIX || ultrix)
/* AIX 370, AIX PS/2, and ULTRIX have these, but no regex.h, sigh... */
#if __cplusplus
extern "C" {
#endif /* __cplusplus */
char *re_comp ARGS((const char *s_));
int re_exec ARGS((const char *s_));
#if __cplusplus
};
#endif /* __cplusplus */
#else /* NOT (_AIX || ultrix) */
#include <regex.h>
#endif /* (_AIX || ultrix) */
#if NEW_STYLE
static int
match_regexp(const char *string,const char *pattern)
#else /* K&R style */
static int
match_regexp(string,pattern)
const char *string;
const char *pattern;
#endif /* NEW_STYLE */
{
if (re_comp(pattern) != (char*)NULL)
fatal("Internal error: bad regular expression");
switch (re_exec(string))
{
case 1:
return (YES);
case 0:
return (NO);
default:
fatal("Internal error: bad regular expression");
}
return (YES); /* keep optimizers happy */
}
#endif /* defined(HAVE_RECOMP) */
/**********************************************************************/
#if defined(HAVE_REGEXP)
const char *sp_global;
#define ERROR(c) regerr()
#define GETC() (*sp++)
#define INIT const char *sp = sp_global;
#define PEEKC() (*sp)
#define RETURN(c) return(c)
#define UNGETC(c) (--sp)
void
regerr(VOID)
{
fatal("Internal error: bad regular expression");
}
#include <regexp.h>
#if NEW_STYLE
static int
match_regexp(const char *string,const char *pattern)
#else /* K&R style */
static int
match_regexp(string,pattern)
const char *string;
const char *pattern;
#endif /* NEW_STYLE */
{
char expbuf[MAX_TOKEN_SIZE];
sp_global = string;
(void)compile((char*)pattern, (char*)expbuf,
(char*)(expbuf + sizeof(expbuf)), '\0');
return (step((char*)string,(char*)expbuf) ? YES : NO);
}
#endif /* defined(HAVE_REGEXP) */
/**********************************************************************/